List of usage examples for org.apache.commons.configuration ConfigurationMap ConfigurationMap
public ConfigurationMap(Configuration configuration)
ConfigurationMap
that wraps the specified Configuration
instance. From source file:it.unimi.di.big.mg4j.tool.Combine.java
public void run() throws ConfigurationException, IOException { final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS); pl.displayFreeMemory = true;/*from ww w . j ava2 s .c om*/ final int maxDocSize; if (writeSizes) { LOGGER.info("Combining sizes..."); final OutputBitStream sizesOutputBitStream = new OutputBitStream( ioFactory.getOutputStream(outputBasename + DiskBasedIndex.SIZES_EXTENSION), bufferSize, false); maxDocSize = combineSizes(sizesOutputBitStream); sizesOutputBitStream.close(); LOGGER.info("Sizes combined."); } else maxDocSize = -1; // To write the new term list final PrintWriter termFile = new PrintWriter(new BufferedWriter(new OutputStreamWriter( ioFactory.getOutputStream(outputBasename + DiskBasedIndex.TERMS_EXTENSION), "UTF-8"), bufferSize)); // The current term MutableString currTerm; long totalOccurrency = 0; pl.expectedUpdates = haveOccurrencies ? numberOfOccurrences : -1; pl.itemsName = haveOccurrencies ? "occurrences" : "terms"; pl.logInterval = logInterval; pl.start("Combining lists..."); int numUsedIndices, k; predictedSize = -1; predictedLengthNumBits = -1; // Discard first zero from offsets if (p != 0) for (InputBitStream ibs : offsets) ibs.readGamma(); // TODO: use the front of the queue? while (!termQueue.isEmpty()) { numUsedIndices = 0; // We read a new word from the queue, copy it and write it to the term file currTerm = term[k = usedIndex[numUsedIndices++] = termQueue.first()].copy(); if (DEBUG) System.err.println("Merging term " + currTerm); currTerm.println(termFile); if (termReader[k].readLine(term[k]) == null) termQueue.dequeue(); else termQueue.changed(); // Then, we extract all equal words from the queue, accumulating the set of indices in inIndex and currIndex while (!termQueue.isEmpty() && term[termQueue.first()].equals(currTerm)) { k = usedIndex[numUsedIndices++] = termQueue.first(); if (termReader[k].readLine(term[k]) == null) termQueue.dequeue(); else termQueue.changed(); } if (numUsedIndices > 1) Arrays.sort(usedIndex, 0, numUsedIndices); // Load index iterators for (int i = numUsedIndices; i-- != 0;) indexIterator[usedIndex[i]] = indexReader[usedIndex[i]].nextIterator(); if (haveOccurrencies) { // Compute and write the total occurrency. This works for any type of combination. totalOccurrency = 0; for (int i = numUsedIndices; i-- != 0;) totalOccurrency += occurrencies[usedIndex[i]].readLongGamma(); } if (p != 0) { predictedSize = 0; predictedLengthNumBits = 0; for (int i = numUsedIndices; i-- != 0;) { if (index[usedIndex[i]] instanceof BitStreamHPIndex) { predictedSize += offsets[usedIndex[i]].readLongGamma(); if (hasPositions) predictedLengthNumBits += posNumBits[usedIndex[i]].readLongGamma(); } else { // Interleaved index: we must subtract the number of bits used for positions from the length of the overall inverted list final long t = hasPositions ? posNumBits[usedIndex[i]].readLongGamma() : 0; predictedSize += offsets[usedIndex[i]].readLongGamma() - t; predictedLengthNumBits += t; } } } combine(numUsedIndices, totalOccurrency); /* A trick to get a correct prediction. */ if (haveOccurrencies) pl.count += totalOccurrency - 1; pl.update(); } pl.done(); termFile.close(); if (!metadataOnly) { for (int i = numIndices; i-- != 0;) { indexReader[i].close(); if (haveOccurrencies) occurrencies[i].close(); if (sumsMaxPos[i] != null) sumsMaxPos[i].close(); if (p != 0) { offsets[i].close(); if (posNumBits[i] != null) posNumBits[i].close(); } termReader[i].close(); } final long indexSize = indexWriter.writtenBits(); indexWriter.close(); final Properties properties = indexWriter.properties(); additionalProperties.setProperty(Index.PropertyKeys.SIZE, indexSize); additionalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize); additionalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, maxCount); additionalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, numberOfOccurrences); properties.addAll(additionalProperties); LOGGER.debug("Post-merge properties: " + new ConfigurationMap(properties)); Scan.saveProperties(ioFactory, properties, outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION); } final PrintStream stats = new PrintStream( ioFactory.getOutputStream(outputBasename + DiskBasedIndex.STATS_EXTENSION)); if (!metadataOnly) indexWriter.printStats(stats); stats.close(); }