Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig()

Source Link

Document

Creates a new config, using StandardAnalyzer as the analyzer.

Usage

From source file:IndexTaxis.java

License:Apache License

public static void main(String[] args) throws Exception {
    Path indexPath = Paths.get(args[0]);
    Directory dir = FSDirectory.open(indexPath);
    int threadCount = Integer.parseInt(args[1]);
    Path docsPath = Paths.get(args[2]);

    IndexWriterConfig iwc = new IndexWriterConfig();
    //System.out.println("NOW SET INFO STREAM");
    iwc.setRAMBufferSizeMB(1024.);/*from  w  w w  .  j av a  2  s.c o  m*/
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).disableAutoIOThrottle();

    final IndexWriter w = new IndexWriter(dir, iwc);

    BufferedInputStream docs = new BufferedInputStream(Files.newInputStream(docsPath, StandardOpenOption.READ));

    // parse the header fields
    List<String> fieldsList = new ArrayList<>();
    StringBuilder builder = new StringBuilder();
    while (true) {
        int x = docs.read();
        if (x == -1) {
            throw new IllegalArgumentException(
                    "hit EOF while trying to read CSV header; are you sure you have the right CSV file!");
        }
        byte b = (byte) x;
        if (b == NEWLINE) {
            fieldsList.add(builder.toString());
            break;
        } else if (b == COMMA) {
            fieldsList.add(builder.toString());
            builder.setLength(0);
        } else {
            // this is OK because headers are all ascii:
            builder.append((char) b);
        }
    }

    final String[] fields = fieldsList.toArray(new String[fieldsList.size()]);

    Thread[] threads = new Thread[threadCount];

    final AtomicInteger docCounter = new AtomicInteger();
    final AtomicLong bytesCounter = new AtomicLong();

    startNS = System.nanoTime();

    for (int i = 0; i < threadCount; i++) {
        final int threadID = i;
        threads[i] = new Thread() {
            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws IOException {
                while (true) {
                    Chunk chunk = readChunk(docs);
                    if (chunk == null) {
                        break;
                    }
                    indexOneChunk(fields, chunk, w, docCounter, bytesCounter);
                }
            }
        };
        threads[i].start();
    }

    for (int i = 0; i < threadCount; i++) {
        threads[i].join();
    }
    System.out.println("Indexing done; now close");

    w.close();
    docs.close();
}

From source file:com.github.flaxsearch.testutil.Fixtures.java

License:Apache License

private static void populateIndex(Directory directory) {
    try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {

        {//  w ww .j  a v  a 2  s .c o m
            Document doc = new Document();
            doc.add(new TextField("field2", "here is some text", Field.Store.YES));
            doc.add(new StringField("field1", "value1", Field.Store.YES));
            doc.add(new IntPoint("point", 2, 4));
            doc.add(new IntPoint("point", 0, 1));
            doc.add(new IntPoint("point", 2, 1));
            doc.add(new IntPoint("point", 14, 4));
            writer.addDocument(doc);
            // more than one segment
            writer.commit();
        }

        {
            Document doc = new Document();
            doc.add(new StringField("field1", "value2", Field.Store.YES));
            doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes")));
            doc.add(new TextField("field3",
                    "this is some more text in a different field value1 value11 value12 value21",
                    Field.Store.YES));
            writer.addDocument(doc);
        }

    } catch (IOException e) {
        throw new RuntimeException("We're a RAMDirectory, this should never happen!");
    }
}

From source file:com.github.flaxsearch.testutil.GutenbergIndex.java

License:Apache License

public static void main(String... args) throws IOException {

    Path source = Paths.get("src/test/resources/gutenberg");
    Path index = Paths.get("src/test/resources/index");

    clearDirectory(index);//from   w w w.  j av a  2 s. co  m
    try (Directory directory = FSDirectory.open(index);
            IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {
        writeDocuments(writer, source);
    }

}