List of usage examples for org.apache.lucene.facet.taxonomy TaxonomyWriter getSize
public int getSize();
From source file:com.fuerve.villageelder.indexing.IndexerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.Indexer#Indexer(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory)}. *///w w w . jav a 2 s . com @Test public final void testIndexerDirectoryDirectory() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); Field imField = Indexer.class.getDeclaredField("indexManager"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); imField.setAccessible(true); Indexer target = new Indexer(indexDirectory, taxonomyDirectory); IndexManager testManager = (IndexManager) imField.get(target); // TEST 1: A newly constructed Indexer believes itself // to be uninitialized, as indicated by the 'initialized' // field. boolean initActual = initField.getBoolean(testManager); assertFalse(initActual); target.initializeIndex(); Directory idActual = (Directory) idField.get(testManager); Directory tdActual = (Directory) tdField.get(testManager); IndexWriter iwActual = (IndexWriter) iwField.get(testManager); TaxonomyWriter twActual = (TaxonomyWriter) twField.get(testManager); boolean stActual = (Boolean) stField.get(testManager); initActual = initField.getBoolean(testManager); // TEST 2: The Indexer's index directory is what was passed in. assertEquals(indexDirectory, idActual); // TEST 3: The Indexer's taxonomy directory is what was passed in. assertEquals(taxonomyDirectory, tdActual); // TEST 4: The IndexWriter's directory is what was passed in. assertEquals(indexDirectory, iwActual.getDirectory()); // TEST 5: The taxonomy index is initialized afresh with no categories // in it. assertEquals(1, twActual.getSize()); // TEST 6: An Indexer constructed with Directories does not // believe that it needs to construct new Directories from string // pathnames. assertEquals(false, stActual); // TEST 7: The Indexer's initialized field is true after it // has been initialized. assertEquals(true, initActual); target.dispose(); // TEST 8: The Indexer's index writer is null after it has // been disposed. iwActual = (IndexWriter) iwField.get(testManager); assertEquals(null, iwActual); // TEST 9: The Indexer's taxonomy writer is null after it // has been disposed. twActual = (TaxonomyWriter) twField.get(testManager); assertEquals(null, twActual); // TEST 10: The Indexer's initialized flag is false after // it has been disposed. initActual = initField.getBoolean(testManager); assertEquals(false, initActual); }
From source file:com.fuerve.villageelder.indexing.IndexerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.Indexer#Indexer(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}. *///from w ww . j ava 2 s.c om @Test public final void testIndexerDirectoryDirectoryOpenMode() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); Field imField = Indexer.class.getDeclaredField("indexManager"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); imField.setAccessible(true); Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); IndexManager testManager = (IndexManager) imField.get(target); TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager); IndexWriter iw = (IndexWriter) iwField.get(testManager); tw.addCategory(new CategoryPath("test/stuff", '/')); Document doc = new Document(); doc.add(new LongField("testfield", 1000L, Store.YES)); iw.addDocument(doc); target.dispose(); // TEST: Initializing an index, disposing it and initializing another // index instance on the same Directories results in loading the same // index. Indexer target2 = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.APPEND); target2.initializeIndex(); testManager = (IndexManager) imField.get(target2); iw = (IndexWriter) iwField.get(testManager); tw = (TaxonomyWriter) twField.get(testManager); assertEquals(1, iw.numDocs()); assertEquals(3, tw.getSize()); target2.dispose(); }
From source file:com.fuerve.villageelder.indexing.IndexerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.Indexer#indexRevision(com.fuerve.villageelder.sourcecontrol.RevisionInfo)}. * @throws Exception //from ww w . j av a 2 s.c o m */ @Test public final void testIndexRevision() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field imField = Indexer.class.getDeclaredField("indexManager"); iwField.setAccessible(true); twField.setAccessible(true); imField.setAccessible(true); Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); IndexManager testManager = (IndexManager) imField.get(target); target.indexRevision(buildDumbRevisionInfo()); TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager); IndexWriter iw = (IndexWriter) iwField.get(testManager); assertEquals(1, iw.numDocs()); assertEquals(8, tw.getSize()); target.dispose(); }
From source file:com.fuerve.villageelder.indexing.IndexerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.Indexer#indexRevisions(java.lang.Iterable)}. *//*w ww. j a v a2 s . com*/ @Test public final void testIndexRevisions() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field imField = Indexer.class.getDeclaredField("indexManager"); iwField.setAccessible(true); twField.setAccessible(true); imField.setAccessible(true); Indexer target = new Indexer(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); IndexManager testManager = (IndexManager) imField.get(target); List<RevisionInfo> revisions = new ArrayList<RevisionInfo>(); revisions.add(buildDumbRevisionInfo()); revisions.add(buildDumbRevisionInfo()); target.indexRevisions(revisions); TaxonomyWriter tw = (TaxonomyWriter) twField.get(testManager); IndexWriter iw = (IndexWriter) iwField.get(testManager); assertEquals(2, iw.numDocs()); assertEquals(8, tw.getSize()); target.dispose(); }
From source file:com.fuerve.villageelder.indexing.IndexManagerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#IndexManager(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory)}. * @throws Exception //from ww w . j av a2 s . co m */ @Test public final void testIndexManagerDirectoryDirectory() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory); // TEST 1: A newly constructed IndexManager believes itself // to be uninitialized, as indicated by the 'initialized' // field. boolean initActual = initField.getBoolean(target); assertFalse(initActual); target.initializeIndex(); Directory idActual = (Directory) idField.get(target); Directory tdActual = (Directory) tdField.get(target); IndexWriter iwActual = (IndexWriter) iwField.get(target); TaxonomyWriter twActual = (TaxonomyWriter) twField.get(target); boolean stActual = (Boolean) stField.get(target); initActual = initField.getBoolean(target); // TEST 2: The IndexManager's index directory is what was passed in. assertEquals(indexDirectory, idActual); // TEST 3: The IndexManager's taxonomy directory is what was passed in. assertEquals(taxonomyDirectory, tdActual); // TEST 4: The IndexWriter's directory is what was passed in. assertEquals(indexDirectory, iwActual.getDirectory()); // TEST 5: The taxonomy index is initialized afresh with no categories // in it. assertEquals(1, twActual.getSize()); // TEST 6: An IndexManager constructed with Directories does not // believe that it needs to construct new Directories from string // pathnames. assertEquals(false, stActual); // TEST 7: The IndexManager's initialized field is true after it // has been initialized. assertEquals(true, initActual); target.dispose(); // TEST 8: The IndexManager's index writer is null after it has // been disposed. iwActual = (IndexWriter) iwField.get(target); assertEquals(null, iwActual); // TEST 9: The IndexManager's taxonomy writer is null after it // has been disposed. twActual = (TaxonomyWriter) twField.get(target); assertEquals(null, twActual); // TEST 10: The IndexManager's initialized flag is false after // it has been disposed. initActual = initField.getBoolean(target); assertEquals(false, initActual); }
From source file:com.fuerve.villageelder.indexing.IndexManagerTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.indexing.IndexManager#IndexManager(org.apache.lucene.store.Directory, org.apache.lucene.store.Directory, org.apache.lucene.index.IndexWriterConfig.OpenMode)}. *//*from w w w .jav a2 s . c o m*/ @Test public final void testIndexManagerDirectoryDirectoryOpenMode() throws Exception { RAMDirectory indexDirectory = new RAMDirectory(); RAMDirectory taxonomyDirectory = new RAMDirectory(); Field idField = IndexManager.class.getDeclaredField("indexDirectory"); Field tdField = IndexManager.class.getDeclaredField("taxonomyDirectory"); Field iwField = IndexManager.class.getDeclaredField("indexWriter"); Field twField = IndexManager.class.getDeclaredField("taxonomyWriter"); Field stField = IndexManager.class.getDeclaredField("stringDirectories"); Field initField = IndexManager.class.getDeclaredField("initialized"); idField.setAccessible(true); tdField.setAccessible(true); iwField.setAccessible(true); twField.setAccessible(true); stField.setAccessible(true); initField.setAccessible(true); IndexManager target = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.CREATE); target.initializeIndex(); TaxonomyWriter tw = (TaxonomyWriter) twField.get(target); IndexWriter iw = (IndexWriter) iwField.get(target); tw.addCategory(new CategoryPath("test/stuff", '/')); Document doc = new Document(); doc.add(new LongField("testfield", 1000L, Store.YES)); iw.addDocument(doc); target.dispose(); // TEST: Initializing an index, disposing it and initializing another // index instance on the same Directories results in loading the same // index. IndexManager target2 = new IndexManager(indexDirectory, taxonomyDirectory, OpenMode.APPEND); target2.initializeIndex(); iw = (IndexWriter) iwField.get(target2); tw = (TaxonomyWriter) twField.get(target2); assertEquals(1, iw.numDocs()); assertEquals(3, tw.getSize()); target2.dispose(); }
From source file:perf.Indexer.java
License:Apache License
private static void _main(String[] clArgs) throws Exception { Args args = new Args(clArgs); // EG: -facets Date -facets characterCount ... FacetsConfig facetsConfig = new FacetsConfig(); facetsConfig.setHierarchical("Date", true); final Set<String> facetFields = new HashSet<String>(); if (args.hasArg("-facets")) { for (String arg : args.getStrings("-facets")) { facetFields.add(arg);//w w w. j a v a 2s . c o m } } final String dirImpl = args.getString("-dirImpl"); final String dirPath = args.getString("-indexPath") + "/index"; final Directory dir; OpenDirectory od = OpenDirectory.get(dirImpl); dir = od.open(Paths.get(dirPath)); final String analyzer = args.getString("-analyzer"); final Analyzer a; if (analyzer.equals("EnglishAnalyzer")) { a = new EnglishAnalyzer(); } else if (analyzer.equals("StandardAnalyzer")) { a = new StandardAnalyzer(); } else if (analyzer.equals("StandardAnalyzerNoStopWords")) { a = new StandardAnalyzer(CharArraySet.EMPTY_SET); } else if (analyzer.equals("ShingleStandardAnalyzer")) { a = new ShingleAnalyzerWrapper(new StandardAnalyzer(), 2, 2); } else if (analyzer.equals("ShingleStandardAnalyzerNoStopWords")) { a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2); } else { throw new RuntimeException("unknown analyzer " + analyzer); } final String lineFile = args.getString("-lineDocsFile"); // -1 means all docs in the line file: final int docCountLimit = args.getInt("-docCountLimit"); final int numThreads = args.getInt("-threadCount"); final boolean doForceMerge = args.getFlag("-forceMerge"); final boolean verbose = args.getFlag("-verbose"); String indexSortField = null; SortField.Type indexSortType = null; if (args.hasArg("-indexSort")) { indexSortField = args.getString("-indexSort"); int i = indexSortField.indexOf(':'); if (i == -1) { throw new IllegalArgumentException( "-indexSort should have form field:type; got: " + indexSortField); } String typeString = indexSortField.substring(i + 1, indexSortField.length()); if (typeString.equals("long")) { indexSortType = SortField.Type.LONG; } else if (typeString.equals("string")) { indexSortType = SortField.Type.STRING; } else { throw new IllegalArgumentException("-indexSort can only handle 'long' sort; got: " + typeString); } indexSortField = indexSortField.substring(0, i); } else { indexSortType = null; } final double ramBufferSizeMB = args.getDouble("-ramBufferMB"); final int maxBufferedDocs = args.getInt("-maxBufferedDocs"); final String defaultPostingsFormat = args.getString("-postingsFormat"); final boolean doDeletions = args.getFlag("-deletions"); final boolean printDPS = args.getFlag("-printDPS"); final boolean waitForMerges = args.getFlag("-waitForMerges"); final boolean waitForCommit = args.getFlag("-waitForCommit"); final String mergePolicy = args.getString("-mergePolicy"); final Mode mode; final boolean doUpdate = args.getFlag("-update"); if (doUpdate) { mode = Mode.UPDATE; } else { mode = Mode.valueOf(args.getString("-mode", "add").toUpperCase(Locale.ROOT)); } int randomDocIDMax; if (mode == Mode.UPDATE) { randomDocIDMax = args.getInt("-randomDocIDMax"); } else { randomDocIDMax = -1; } final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat"); final boolean addGroupingFields = args.getFlag("-grouping"); final boolean useCFS = args.getFlag("-cfs"); final boolean storeBody = args.getFlag("-store"); final boolean tvsBody = args.getFlag("-tvs"); final boolean bodyPostingsOffsets = args.getFlag("-bodyPostingsOffsets"); final int maxConcurrentMerges = args.getInt("-maxConcurrentMerges"); final boolean addDVFields = args.getFlag("-dvfields"); final boolean doRandomCommit = args.getFlag("-randomCommit"); final boolean useCMS = args.getFlag("-useCMS"); final boolean disableIOThrottle = args.getFlag("-disableIOThrottle"); if (waitForCommit == false && waitForMerges) { throw new RuntimeException("pass -waitForCommit if you pass -waitForMerges"); } if (waitForCommit == false && doForceMerge) { throw new RuntimeException("pass -waitForCommit if you pass -forceMerge"); } if (waitForCommit == false && doDeletions) { throw new RuntimeException("pass -waitForCommit if you pass -deletions"); } if (useCMS == false && disableIOThrottle) { throw new RuntimeException("-disableIOThrottle only makes sense with -useCMS"); } final double nrtEverySec; if (args.hasArg("-nrtEverySec")) { nrtEverySec = args.getDouble("-nrtEverySec"); } else { nrtEverySec = -1.0; } // True to start back at the beginning if we run out of // docs from the line file source: final boolean repeatDocs = args.getFlag("-repeatDocs"); final String facetDVFormatName; if (facetFields.isEmpty()) { facetDVFormatName = "Lucene54"; } else { facetDVFormatName = args.getString("-facetDVFormat"); } if (addGroupingFields && docCountLimit == -1) { a.close(); throw new RuntimeException("cannot add grouping fields unless docCount is set"); } args.check(); System.out.println("Dir: " + dirImpl); System.out.println("Index path: " + dirPath); System.out.println("Analyzer: " + analyzer); System.out.println("Line file: " + lineFile); System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit)); System.out.println("Threads: " + numThreads); System.out.println("Force merge: " + (doForceMerge ? "yes" : "no")); System.out.println("Verbose: " + (verbose ? "yes" : "no")); System.out.println("RAM Buffer MB: " + ramBufferSizeMB); System.out.println("Max buffered docs: " + maxBufferedDocs); System.out.println("Default postings format: " + defaultPostingsFormat); System.out.println("Do deletions: " + (doDeletions ? "yes" : "no")); System.out.println("Wait for merges: " + (waitForMerges ? "yes" : "no")); System.out.println("Wait for commit: " + (waitForCommit ? "yes" : "no")); System.out.println("IO throttle: " + (disableIOThrottle ? "no" : "yes")); System.out.println("Merge policy: " + mergePolicy); System.out.println("Mode: " + mode); if (mode == Mode.UPDATE) { System.out.println("DocIDMax: " + randomDocIDMax); } System.out.println("ID field postings format: " + idFieldPostingsFormat); System.out.println("Add grouping fields: " + (addGroupingFields ? "yes" : "no")); System.out.println("Compound file format: " + (useCFS ? "yes" : "no")); System.out.println("Store body field: " + (storeBody ? "yes" : "no")); System.out.println("Term vectors for body field: " + (tvsBody ? "yes" : "no")); System.out.println("Facet DV Format: " + facetDVFormatName); System.out.println("Facet fields: " + facetFields); System.out.println("Body postings offsets: " + (bodyPostingsOffsets ? "yes" : "no")); System.out.println("Max concurrent merges: " + maxConcurrentMerges); System.out.println("Add DocValues fields: " + addDVFields); System.out.println("Use ConcurrentMergeScheduler: " + useCMS); if (nrtEverySec > 0.0) { System.out.println("Open & close NRT reader every: " + nrtEverySec + " sec"); } else { System.out.println("Open & close NRT reader every: never"); } System.out.println("Repeat docs: " + repeatDocs); if (verbose) { InfoStream.setDefault(new PrintStreamInfoStream(System.out)); } final IndexWriterConfig iwc = new IndexWriterConfig(a); if (indexSortField != null) { iwc.setIndexSort(new Sort(new SortField(indexSortField, indexSortType))); } if (mode == Mode.UPDATE) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } iwc.setMaxBufferedDocs(maxBufferedDocs); iwc.setRAMBufferSizeMB(ramBufferSizeMB); // So flushed segments do/don't use CFS: iwc.setUseCompoundFile(useCFS); final AtomicBoolean indexingFailed = new AtomicBoolean(); iwc.setMergeScheduler(getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle)); iwc.setMergePolicy(getMergePolicy(mergePolicy, useCFS)); // Keep all commit points: if (doDeletions || doForceMerge) { iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); } final Codec codec = new Lucene62Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return PostingsFormat.forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat); } private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName(facetDVFormatName); //private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42"); //private final DocValuesFormat diskDVFormat = DocValuesFormat.forName("Disk"); // private final DocValuesFormat lucene45DVFormat = DocValuesFormat.forName("Lucene45"); private final DocValuesFormat directDVFormat = DocValuesFormat.forName("Direct"); @Override public DocValuesFormat getDocValuesFormatForField(String field) { if (facetFields.contains(field) || field.equals("$facets")) { return facetsDVFormat; //} else if (field.equals("$facets_sorted_doc_values")) { //return diskDVFormat; } else { // Use default DVFormat for all else: // System.out.println("DV: field=" + field + " format=" + super.getDocValuesFormatForField(field)); return super.getDocValuesFormatForField(field); } } }; iwc.setCodec(codec); System.out.println("IW config=" + iwc); IndexWriter w = new IndexWriter(dir, iwc); System.out.println("Index has " + w.maxDoc() + " docs"); final TaxonomyWriter taxoWriter; if (facetFields.isEmpty() == false) { taxoWriter = new DirectoryTaxonomyWriter(od.open(Paths.get(args.getString("-indexPath"), "facets")), IndexWriterConfig.OpenMode.CREATE); } else { taxoWriter = null; } // Fixed seed so group field values are always consistent: final Random random = new Random(17); LineFileDocs lineFileDocs = new LineFileDocs(lineFile, repeatDocs, storeBody, tvsBody, bodyPostingsOffsets, false, taxoWriter, facetFields, facetsConfig, addDVFields); float docsPerSecPerThread = -1f; //float docsPerSecPerThread = 100f; IndexThreads threads = new IndexThreads(random, w, indexingFailed, lineFileDocs, numThreads, docCountLimit, addGroupingFields, printDPS, mode, docsPerSecPerThread, null, nrtEverySec, randomDocIDMax); System.out.println("\nIndexer: start"); final long t0 = System.currentTimeMillis(); threads.start(); while (!threads.done() && indexingFailed.get() == false) { Thread.sleep(100); // Commits once per minute on average: if (doRandomCommit && random.nextInt(600) == 17) { System.out.println("Indexer: now commit"); long commitStartNS = System.nanoTime(); w.commit(); System.out.println(String.format(Locale.ROOT, "Indexer: commit took %.1f msec", (System.nanoTime() - commitStartNS) / 1000000.)); } } threads.stop(); final long t1 = System.currentTimeMillis(); System.out.println("\nIndexer: indexing done (" + (t1 - t0) + " msec); total " + w.maxDoc() + " docs"); // if we update we can not tell how many docs if (threads.failed.get()) { throw new RuntimeException("exceptions during indexing"); } if (mode != Mode.UPDATE && docCountLimit != -1 && w.maxDoc() != docCountLimit) { throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit); } final Map<String, String> commitData = new HashMap<String, String>(); if (waitForMerges) { w.close(); IndexWriterConfig iwc2 = new IndexWriterConfig(a); iwc2.setMergeScheduler( getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle)); iwc2.setMergePolicy(getMergePolicy(mergePolicy, useCFS)); iwc2.setCodec(codec); iwc2.setUseCompoundFile(useCFS); iwc2.setMaxBufferedDocs(maxBufferedDocs); iwc2.setRAMBufferSizeMB(ramBufferSizeMB); if (indexSortField != null) { iwc2.setIndexSort(new Sort(new SortField(indexSortField, indexSortType))); } w = new IndexWriter(dir, iwc2); long t2 = System.currentTimeMillis(); System.out.println("\nIndexer: waitForMerges done (" + (t2 - t1) + " msec)"); } if (waitForCommit) { commitData.put("userData", "multi"); w.setLiveCommitData(commitData.entrySet()); long t2 = System.currentTimeMillis(); w.commit(); long t3 = System.currentTimeMillis(); System.out.println("\nIndexer: commit multi (took " + (t3 - t2) + " msec)"); } else { w.rollback(); w = null; } if (doForceMerge) { long forceMergeStartMSec = System.currentTimeMillis(); w.forceMerge(1); long forceMergeEndMSec = System.currentTimeMillis(); System.out.println( "\nIndexer: force merge done (took " + (forceMergeEndMSec - forceMergeStartMSec) + " msec)"); commitData.put("userData", "single"); w.setLiveCommitData(commitData.entrySet()); w.commit(); final long t5 = System.currentTimeMillis(); System.out.println("\nIndexer: commit single done (took " + (t5 - forceMergeEndMSec) + " msec)"); } if (doDeletions) { final long t5 = System.currentTimeMillis(); // Randomly delete 5% of the docs final Set<Integer> deleted = new HashSet<Integer>(); final int maxDoc = w.maxDoc(); final int toDeleteCount = (int) (maxDoc * 0.05); System.out.println("\nIndexer: delete " + toDeleteCount + " docs"); while (deleted.size() < toDeleteCount) { final int id = random.nextInt(maxDoc); if (!deleted.contains(id)) { deleted.add(id); w.deleteDocuments(new Term("id", LineFileDocs.intToID(id))); } } final long t6 = System.currentTimeMillis(); System.out.println("\nIndexer: deletes done (took " + (t6 - t5) + " msec)"); commitData.put("userData", doForceMerge ? "delsingle" : "delmulti"); w.setLiveCommitData(commitData.entrySet()); w.commit(); final long t7 = System.currentTimeMillis(); System.out.println("\nIndexer: commit delmulti done (took " + (t7 - t6) + " msec)"); if (doUpdate || w.numDocs() != maxDoc - toDeleteCount) { throw new RuntimeException( "count mismatch: w.numDocs()=" + w.numDocs() + " but expected " + (maxDoc - toDeleteCount)); } } if (taxoWriter != null) { System.out.println("Taxonomy has " + taxoWriter.getSize() + " ords"); taxoWriter.commit(); taxoWriter.close(); } final long tCloseStart = System.currentTimeMillis(); if (w != null) { w.close(); w = null; } if (waitForCommit) { System.out.println("\nIndexer: at close: " + SegmentInfos.readLatestCommit(dir)); System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) + " msec"); } dir.close(); final long tFinal = System.currentTimeMillis(); System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed()); final long indexingTime; if (waitForCommit) { indexingTime = tFinal - t0; System.out.println("\nIndexer: finished (" + indexingTime + " msec)"); } else { indexingTime = t1 - t0; System.out.println("\nIndexer: finished (" + indexingTime + " msec), excluding commit"); } System.out.println( "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / (indexingTime / 3600000.)) + " GB/hour plain text"); }