Example usage for org.apache.lucene.index IndexWriterConfig setInfoStream

List of usage examples for org.apache.lucene.index IndexWriterConfig setInfoStream

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setInfoStream.

Prototype

public IndexWriterConfig setInfoStream(PrintStream printStream) 

Source Link

Document

Convenience method that uses PrintStreamInfoStream .

Usage

From source file:IndexAndSearchOpenStreetMaps1D.java

License:Apache License

private static void createIndex() throws IOException {

    long t0 = System.nanoTime();

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is = Files
            .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : "")));

    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //iwc.setMaxBufferedDocs(109630);
    //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setRAMBufferSizeMB(256.0);/* w w  w . jav  a  2  s  . c o m*/
    iwc.setMergePolicy(new LogDocMergePolicy());
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    IndexWriter w = new IndexWriter(dir, iwc);

    int count = 0;
    byte[] scratch = new byte[4];
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }

        String[] parts = line.split(",");
        //long id = Long.parseLong(parts[0]);
        int lat = (int) (1000000. * Double.parseDouble(parts[1]));
        //int lon = (int) (1000000. * Double.parseDouble(parts[2]));
        Document doc = new Document();
        if (USE_NF) {
            doc.add(new LegacyIntField("latnum", lat, Field.Store.NO));
            //doc.add(new LongField("lonnum", lon, Field.Store.NO));
        } else {
            doc.add(new IntPoint("lat", lat));
            //doc.add(new SortedNumericDocValuesField("lon", lon));
        }
        w.addDocument(doc);
        count++;
        if (count % 1000000 == 0) {
            System.out.println(count + "...");
        }
    }
    //w.forceMerge(1);
    w.commit();
    System.out.println(w.maxDoc() + " total docs");

    w.close();
    long t1 = System.nanoTime();
    System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index");
}

From source file:IndexTaxis.java

License:Apache License

public static void main(String[] args) throws Exception {
    Path indexPath = Paths.get(args[0]);
    Directory dir = FSDirectory.open(indexPath);
    int threadCount = Integer.parseInt(args[1]);
    Path docsPath = Paths.get(args[2]);

    IndexWriterConfig iwc = new IndexWriterConfig();
    //System.out.println("NOW SET INFO STREAM");
    iwc.setRAMBufferSizeMB(1024.);//from ww w  .j a v  a  2 s  .c om
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).disableAutoIOThrottle();

    final IndexWriter w = new IndexWriter(dir, iwc);

    BufferedInputStream docs = new BufferedInputStream(Files.newInputStream(docsPath, StandardOpenOption.READ));

    // parse the header fields
    List<String> fieldsList = new ArrayList<>();
    StringBuilder builder = new StringBuilder();
    while (true) {
        int x = docs.read();
        if (x == -1) {
            throw new IllegalArgumentException(
                    "hit EOF while trying to read CSV header; are you sure you have the right CSV file!");
        }
        byte b = (byte) x;
        if (b == NEWLINE) {
            fieldsList.add(builder.toString());
            break;
        } else if (b == COMMA) {
            fieldsList.add(builder.toString());
            builder.setLength(0);
        } else {
            // this is OK because headers are all ascii:
            builder.append((char) b);
        }
    }

    final String[] fields = fieldsList.toArray(new String[fieldsList.size()]);

    Thread[] threads = new Thread[threadCount];

    final AtomicInteger docCounter = new AtomicInteger();
    final AtomicLong bytesCounter = new AtomicLong();

    startNS = System.nanoTime();

    for (int i = 0; i < threadCount; i++) {
        final int threadID = i;
        threads[i] = new Thread() {
            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws IOException {
                while (true) {
                    Chunk chunk = readChunk(docs);
                    if (chunk == null) {
                        break;
                    }
                    indexOneChunk(fields, chunk, w, docCounter, bytesCounter);
                }
            }
        };
        threads[i].start();
    }

    for (int i = 0; i < threadCount; i++) {
        threads[i].join();
    }
    System.out.println("Indexing done; now close");

    w.close();
    docs.close();
}

From source file:com.mathworks.xzheng.indexing.Fragments.java

License:Apache License

public void setInfoStream() throws Exception {
    Directory dir = null;//ww  w .j ava  2  s  .  com
    Analyzer analyzer = null;
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);

    // START
    IndexWriter writer = new IndexWriter(dir, config);
    config.setInfoStream(System.out);
    // END
}

From source file:com.mathworks.xzheng.indexing.VerboseIndexing.java

License:Apache License

private void index() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(dir, config);

    config.setInfoStream(System.out);

    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);/*from  w ww .  ja  va 2  s . c  o  m*/
    }
    //writer.optimize();
    writer.forceMerge(Integer.MAX_VALUE);
    writer.close();
}

From source file:com.study.lucene.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from  w w w. j a  va2s.c o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setInfoStream(System.out);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:io.datalayer.lucene.helper.AosUtil.java

License:Apache License

public void setInfoStream() throws Exception {
    Directory dir = null;/*from w  ww  .  j a va  2s .  com*/
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46,
            AosAnalyser.NO_LIMIT_TOKEN_COUNT_SIMPLE_ANALYSER);
    conf.setInfoStream(System.out);
    IndexWriter writer = new IndexWriter(dir, conf);
}

From source file:io.datalayer.lucene.index.VerboseIndexing.java

License:Apache License

private void index() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            AosAnalyser.NO_LIMIT_TOKEN_COUNT_SIMPLE_ANALYSER);

    config.setInfoStream(System.out);

    IndexWriter writer = new IndexWriter(dir, config);

    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        doc.add(new StoredField("keyword", "goober"));
        writer.addDocument(doc);//from w  w w .  j  a  v  a  2s.co  m
    }

    writer.close();

}

From source file:org.apache.blur.lucene.index.BlurIndexWriterTest.java

License:Apache License

@Test
public void testIndexRelocationFencing()
        throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException {
    final AtomicBoolean fail1 = new AtomicBoolean();
    final AtomicBoolean fail2 = new AtomicBoolean();
    final Path hdfsDirPath = new Path(toUri("./target/tmp/BlurIndexWriterTest"));
    final Directory directory = new RAMDirectory();
    Thread thread1 = new Thread(new Runnable() {
        @Override/*  w  w  w.  jav  a  2 s  .c o m*/
        public void run() {
            BlurIndexWriter writer = null;
            try {
                BlurLockFactory blurLockFactory = new BlurLockFactory(_configuration, hdfsDirPath, "node1",
                        "1");
                directory.setLockFactory(blurLockFactory);
                IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
                conf.setInfoStream(getInfoStream());
                writer = new BlurIndexWriter(directory, conf);
                writer.addIndexes(addDir("1"));
                waitToLooseLock();
                writer.prepareCommit();
                fail1.set(true);
            } catch (IOException e) {
                e.printStackTrace();
                if (writer != null) {
                    try {
                        writer.rollback();
                    } catch (IOException e1) {
                        e1.printStackTrace();
                    }
                }
                if (writer != null) {
                    try {
                        writer.close();
                    } catch (IOException e1) {
                        e1.printStackTrace();
                    }
                }
            }
        }
    });

    Thread thread2 = new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                waitForDirInThread1ToBeAdded(directory);
                BlurLockFactory blurLockFactory = new BlurLockFactory(_configuration, hdfsDirPath, "node2",
                        "2");
                directory.setLockFactory(blurLockFactory);
                IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
                conf.setInfoStream(getInfoStream());
                BlurIndexWriter writer = new BlurIndexWriter(directory, conf);
                obtainLock();
                writer.addIndexes(addDir("2"));
                writer.commit();
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
                fail2.set(true);
            }
        }

    });
    thread1.start();
    thread2.start();
    thread1.join();
    thread2.join();

    if (fail1.get()) {
        fail();
    }

    if (fail2.get()) {
        fail();
    }

    DirectoryReader reader = DirectoryReader.open(directory);
    List<AtomicReaderContext> leaves = reader.leaves();
    assertEquals(leaves.size(), 1);
    assertEquals(reader.numDocs(), 1);
    Document document = reader.document(0);
    assertEquals("2", document.get("f"));
    reader.close();
}

From source file:org.apache.solr.update.SolrIndexConfig.java

License:Apache License

public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) {
    // so that we can update the analyzer on core reload, we pass null
    // for the default analyzer, and explicitly pass an analyzer on 
    // appropriate calls to IndexWriter

    IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null);
    if (maxBufferedDocs != -1)
        iwc.setMaxBufferedDocs(maxBufferedDocs);

    if (ramBufferSizeMB != -1)
        iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    if (termIndexInterval != -1)
        iwc.setTermIndexInterval(termIndexInterval);

    if (writeLockTimeout != -1)
        iwc.setWriteLockTimeout(writeLockTimeout);

    iwc.setSimilarity(schema.getSimilarity());
    iwc.setMergePolicy(buildMergePolicy(schema));
    iwc.setMergeScheduler(buildMergeScheduler(schema));
    iwc.setInfoStream(infoStream);

    // do this after buildMergePolicy since the backcompat logic 
    // there may modify the effective useCompoundFile
    iwc.setUseCompoundFile(getUseCompoundFile());

    if (maxIndexingThreads != -1) {
        iwc.setMaxThreadStates(maxIndexingThreads);
    }/*from   w  w w . jav  a 2  s.  c om*/

    if (mergedSegmentWarmerInfo != null) {
        // TODO: add infostream -> normal logging system (there is an issue somewhere)
        IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                IndexReaderWarmer.class, null, new Class[] { InfoStream.class },
                new Object[] { iwc.getInfoStream() });
        iwc.setMergedSegmentWarmer(warmer);
    }

    return iwc;
}

From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java

License:Apache License

private IndexWriter createWriter() throws IOException {
    try {/*ww w.  j  av  a 2  s.c o m*/
        boolean create = !Lucene.indexExists(store.directory());
        IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION,
                analysisService.defaultIndexAnalyzer());
        config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        config.setIndexDeletionPolicy(deletionPolicy);
        config.setInfoStream(new LoggerInfoStream(indexSettings, shardId));
        config.setMergeScheduler(mergeScheduler.newMergeScheduler());
        MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy();
        // Give us the opportunity to upgrade old segments while performing
        // background merges
        mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
        config.setMergePolicy(mergePolicy);
        config.setSimilarity(similarityService.similarity());
        config.setRAMBufferSizeMB(indexingBufferSize.mbFrac());
        config.setMaxThreadStates(indexConcurrency);
        config.setCodec(codecService.codec(codecName));
        /* We set this timeout to a highish value to work around
         * the default poll interval in the Lucene lock that is
         * 1000ms by default. We might need to poll multiple times
         * here but with 1s poll this is only executed twice at most
         * in combination with the default writelock timeout*/
        config.setWriteLockTimeout(5000);
        config.setUseCompoundFile(this.compoundOnFlush);
        config.setCheckIntegrityAtMerge(checksumOnMerge);
        // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end
        // of the merge operation and won't slow down _refresh
        config.setMergedSegmentWarmer(new IndexReaderWarmer() {
            @Override
            public void warm(AtomicReader reader) throws IOException {
                try {
                    assert isMergedSegment(reader);
                    if (warmer != null) {
                        final Engine.Searcher searcher = new SimpleSearcher("warmer",
                                new IndexSearcher(reader));
                        final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId,
                                searcher);
                        warmer.warmNewReaders(context);
                    }
                } catch (Throwable t) {
                    // Don't fail a merge if the warm-up failed
                    if (!closed) {
                        logger.warn("Warm-up failed", t);
                    }
                    if (t instanceof Error) {
                        // assertion/out-of-memory error, don't ignore those
                        throw (Error) t;
                    }
                }
            }
        });
        return new IndexWriter(store.directory(), config);
    } catch (LockObtainFailedException ex) {
        boolean isLocked = IndexWriter.isLocked(store.directory());
        logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked);
        throw ex;
    }
}