List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig()
From source file:IndexTaxis.java
License:Apache License
public static void main(String[] args) throws Exception { Path indexPath = Paths.get(args[0]); Directory dir = FSDirectory.open(indexPath); int threadCount = Integer.parseInt(args[1]); Path docsPath = Paths.get(args[2]); IndexWriterConfig iwc = new IndexWriterConfig(); //System.out.println("NOW SET INFO STREAM"); iwc.setRAMBufferSizeMB(1024.);/*from w w w . j av a 2 s.c o m*/ iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).disableAutoIOThrottle(); final IndexWriter w = new IndexWriter(dir, iwc); BufferedInputStream docs = new BufferedInputStream(Files.newInputStream(docsPath, StandardOpenOption.READ)); // parse the header fields List<String> fieldsList = new ArrayList<>(); StringBuilder builder = new StringBuilder(); while (true) { int x = docs.read(); if (x == -1) { throw new IllegalArgumentException( "hit EOF while trying to read CSV header; are you sure you have the right CSV file!"); } byte b = (byte) x; if (b == NEWLINE) { fieldsList.add(builder.toString()); break; } else if (b == COMMA) { fieldsList.add(builder.toString()); builder.setLength(0); } else { // this is OK because headers are all ascii: builder.append((char) b); } } final String[] fields = fieldsList.toArray(new String[fieldsList.size()]); Thread[] threads = new Thread[threadCount]; final AtomicInteger docCounter = new AtomicInteger(); final AtomicLong bytesCounter = new AtomicLong(); startNS = System.nanoTime(); for (int i = 0; i < threadCount; i++) { final int threadID = i; threads[i] = new Thread() { @Override public void run() { try { _run(); } catch (Exception e) { throw new RuntimeException(e); } } private void _run() throws IOException { while (true) { Chunk chunk = readChunk(docs); if (chunk == null) { break; } indexOneChunk(fields, chunk, w, docCounter, bytesCounter); } } }; threads[i].start(); } for (int i = 0; i < threadCount; i++) { threads[i].join(); } System.out.println("Indexing done; now close"); w.close(); docs.close(); }
From source file:com.github.flaxsearch.testutil.Fixtures.java
License:Apache License
private static void populateIndex(Directory directory) { try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) { {// w ww .j a v a 2 s .c o m Document doc = new Document(); doc.add(new TextField("field2", "here is some text", Field.Store.YES)); doc.add(new StringField("field1", "value1", Field.Store.YES)); doc.add(new IntPoint("point", 2, 4)); doc.add(new IntPoint("point", 0, 1)); doc.add(new IntPoint("point", 2, 1)); doc.add(new IntPoint("point", 14, 4)); writer.addDocument(doc); // more than one segment writer.commit(); } { Document doc = new Document(); doc.add(new StringField("field1", "value2", Field.Store.YES)); doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes"))); doc.add(new TextField("field3", "this is some more text in a different field value1 value11 value12 value21", Field.Store.YES)); writer.addDocument(doc); } } catch (IOException e) { throw new RuntimeException("We're a RAMDirectory, this should never happen!"); } }
From source file:com.github.flaxsearch.testutil.GutenbergIndex.java
License:Apache License
public static void main(String... args) throws IOException { Path source = Paths.get("src/test/resources/gutenberg"); Path index = Paths.get("src/test/resources/index"); clearDirectory(index);//from w w w. j av a 2 s. co m try (Directory directory = FSDirectory.open(index); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) { writeDocuments(writer, source); } }