Example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap

List of usage examples for org.apache.commons.configuration ConfigurationMap ConfigurationMap

Introduction

In this page you can find the example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap.

Prototype

public ConfigurationMap(Configuration configuration) 

Source Link

Document

Creates a new instance of a ConfigurationMap that wraps the specified Configuration instance.

Usage

From source file:it.unimi.di.big.mg4j.tool.Combine.java

public void run() throws ConfigurationException, IOException {
    final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS);
    pl.displayFreeMemory = true;/*from ww  w  .  j ava2  s  .c om*/

    final int maxDocSize;

    if (writeSizes) {
        LOGGER.info("Combining sizes...");
        final OutputBitStream sizesOutputBitStream = new OutputBitStream(
                ioFactory.getOutputStream(outputBasename + DiskBasedIndex.SIZES_EXTENSION), bufferSize, false);
        maxDocSize = combineSizes(sizesOutputBitStream);
        sizesOutputBitStream.close();
        LOGGER.info("Sizes combined.");
    } else
        maxDocSize = -1;

    // To write the new term list
    final PrintWriter termFile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
            ioFactory.getOutputStream(outputBasename + DiskBasedIndex.TERMS_EXTENSION), "UTF-8"), bufferSize));

    // The current term
    MutableString currTerm;

    long totalOccurrency = 0;
    pl.expectedUpdates = haveOccurrencies ? numberOfOccurrences : -1;
    pl.itemsName = haveOccurrencies ? "occurrences" : "terms";
    pl.logInterval = logInterval;
    pl.start("Combining lists...");

    int numUsedIndices, k;
    predictedSize = -1;
    predictedLengthNumBits = -1;

    // Discard first zero from offsets
    if (p != 0)
        for (InputBitStream ibs : offsets)
            ibs.readGamma();

    // TODO: use the front of the queue?
    while (!termQueue.isEmpty()) {
        numUsedIndices = 0;
        // We read a new word from the queue, copy it and write it to the term file
        currTerm = term[k = usedIndex[numUsedIndices++] = termQueue.first()].copy();

        if (DEBUG)
            System.err.println("Merging term " + currTerm);

        currTerm.println(termFile);
        if (termReader[k].readLine(term[k]) == null)
            termQueue.dequeue();
        else
            termQueue.changed();

        // Then, we extract all equal words from the queue, accumulating the set of indices in inIndex and currIndex
        while (!termQueue.isEmpty() && term[termQueue.first()].equals(currTerm)) {
            k = usedIndex[numUsedIndices++] = termQueue.first();
            if (termReader[k].readLine(term[k]) == null)
                termQueue.dequeue();
            else
                termQueue.changed();
        }

        if (numUsedIndices > 1)
            Arrays.sort(usedIndex, 0, numUsedIndices);

        // Load index iterators
        for (int i = numUsedIndices; i-- != 0;)
            indexIterator[usedIndex[i]] = indexReader[usedIndex[i]].nextIterator();

        if (haveOccurrencies) {
            // Compute and write the total occurrency. This works for any type of combination.
            totalOccurrency = 0;
            for (int i = numUsedIndices; i-- != 0;)
                totalOccurrency += occurrencies[usedIndex[i]].readLongGamma();
        }

        if (p != 0) {
            predictedSize = 0;
            predictedLengthNumBits = 0;

            for (int i = numUsedIndices; i-- != 0;) {
                if (index[usedIndex[i]] instanceof BitStreamHPIndex) {
                    predictedSize += offsets[usedIndex[i]].readLongGamma();
                    if (hasPositions)
                        predictedLengthNumBits += posNumBits[usedIndex[i]].readLongGamma();
                } else {
                    // Interleaved index: we must subtract the number of bits used for positions from the length of the overall inverted list
                    final long t = hasPositions ? posNumBits[usedIndex[i]].readLongGamma() : 0;
                    predictedSize += offsets[usedIndex[i]].readLongGamma() - t;
                    predictedLengthNumBits += t;
                }
            }
        }

        combine(numUsedIndices, totalOccurrency);
        /* A trick to get a correct prediction. */
        if (haveOccurrencies)
            pl.count += totalOccurrency - 1;
        pl.update();
    }
    pl.done();

    termFile.close();

    if (!metadataOnly) {
        for (int i = numIndices; i-- != 0;) {
            indexReader[i].close();
            if (haveOccurrencies)
                occurrencies[i].close();
            if (sumsMaxPos[i] != null)
                sumsMaxPos[i].close();
            if (p != 0) {
                offsets[i].close();
                if (posNumBits[i] != null)
                    posNumBits[i].close();
            }
            termReader[i].close();
        }
        final long indexSize = indexWriter.writtenBits();
        indexWriter.close();
        final Properties properties = indexWriter.properties();
        additionalProperties.setProperty(Index.PropertyKeys.SIZE, indexSize);
        additionalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize);
        additionalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, maxCount);
        additionalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, numberOfOccurrences);
        properties.addAll(additionalProperties);
        LOGGER.debug("Post-merge properties: " + new ConfigurationMap(properties));
        Scan.saveProperties(ioFactory, properties, outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    }

    final PrintStream stats = new PrintStream(
            ioFactory.getOutputStream(outputBasename + DiskBasedIndex.STATS_EXTENSION));
    if (!metadataOnly)
        indexWriter.printStats(stats);
    stats.close();
}