Example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap

Introduction

In this page you can find the example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap.

Prototype

public ConfigurationMap(Configuration configuration)

Source Link

Document

Creates a new instance of a ConfigurationMap that wraps the specified Configuration instance.

Usage

From source file:it.unimi.di.big.mg4j.tool.Combine.java

public void run() throws ConfigurationException, IOException {
    final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS);
    pl.displayFreeMemory = true;/*from ww  w  .  j ava2  s  .c om*/

    final int maxDocSize;

    if (writeSizes) {
        LOGGER.info("Combining sizes...");
        final OutputBitStream sizesOutputBitStream = new OutputBitStream(
                ioFactory.getOutputStream(outputBasename + DiskBasedIndex.SIZES_EXTENSION), bufferSize, false);
        maxDocSize = combineSizes(sizesOutputBitStream);
        sizesOutputBitStream.close();
        LOGGER.info("Sizes combined.");
    } else
        maxDocSize = -1;

    // To write the new term list
    final PrintWriter termFile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
            ioFactory.getOutputStream(outputBasename + DiskBasedIndex.TERMS_EXTENSION), "UTF-8"), bufferSize));

    // The current term
    MutableString currTerm;

    long totalOccurrency = 0;
    pl.expectedUpdates = haveOccurrencies ? numberOfOccurrences : -1;
    pl.itemsName = haveOccurrencies ? "occurrences" : "terms";
    pl.logInterval = logInterval;
    pl.start("Combining lists...");

    int numUsedIndices, k;
    predictedSize = -1;
    predictedLengthNumBits = -1;

    // Discard first zero from offsets
    if (p != 0)
        for (InputBitStream ibs : offsets)
            ibs.readGamma();

    // TODO: use the front of the queue?
    while (!termQueue.isEmpty()) {
        numUsedIndices = 0;
        // We read a new word from the queue, copy it and write it to the term file
        currTerm = term[k = usedIndex[numUsedIndices++] = termQueue.first()].copy();

        if (DEBUG)
            System.err.println("Merging term " + currTerm);

        currTerm.println(termFile);
        if (termReader[k].readLine(term[k]) == null)
            termQueue.dequeue();
        else
            termQueue.changed();

        // Then, we extract all equal words from the queue, accumulating the set of indices in inIndex and currIndex
        while (!termQueue.isEmpty() && term[termQueue.first()].equals(currTerm)) {
            k = usedIndex[numUsedIndices++] = termQueue.first();
            if (termReader[k].readLine(term[k]) == null)
                termQueue.dequeue();
            else
                termQueue.changed();
        }

        if (numUsedIndices > 1)
            Arrays.sort(usedIndex, 0, numUsedIndices);

        // Load index iterators
        for (int i = numUsedIndices; i-- != 0;)
            indexIterator[usedIndex[i]] = indexReader[usedIndex[i]].nextIterator();

        if (haveOccurrencies) {
            // Compute and write the total occurrency. This works for any type of combination.
            totalOccurrency = 0;
            for (int i = numUsedIndices; i-- != 0;)
                totalOccurrency += occurrencies[usedIndex[i]].readLongGamma();
        }

        if (p != 0) {
            predictedSize = 0;
            predictedLengthNumBits = 0;

            for (int i = numUsedIndices; i-- != 0;) {
                if (index[usedIndex[i]] instanceof BitStreamHPIndex) {
                    predictedSize += offsets[usedIndex[i]].readLongGamma();
                    if (hasPositions)
                        predictedLengthNumBits += posNumBits[usedIndex[i]].readLongGamma();
                } else {
                    // Interleaved index: we must subtract the number of bits used for positions from the length of the overall inverted list
                    final long t = hasPositions ? posNumBits[usedIndex[i]].readLongGamma() : 0;
                    predictedSize += offsets[usedIndex[i]].readLongGamma() - t;
                    predictedLengthNumBits += t;
                }
            }
        }

        combine(numUsedIndices, totalOccurrency);
        /* A trick to get a correct prediction. */
        if (haveOccurrencies)
            pl.count += totalOccurrency - 1;
        pl.update();
    }
    pl.done();

    termFile.close();

    if (!metadataOnly) {
        for (int i = numIndices; i-- != 0;) {
            indexReader[i].close();
            if (haveOccurrencies)
                occurrencies[i].close();
            if (sumsMaxPos[i] != null)
                sumsMaxPos[i].close();
            if (p != 0) {
                offsets[i].close();
                if (posNumBits[i] != null)
                    posNumBits[i].close();
            }
            termReader[i].close();
        }
        final long indexSize = indexWriter.writtenBits();
        indexWriter.close();
        final Properties properties = indexWriter.properties();
        additionalProperties.setProperty(Index.PropertyKeys.SIZE, indexSize);
        additionalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize);
        additionalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, maxCount);
        additionalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, numberOfOccurrences);
        properties.addAll(additionalProperties);
        LOGGER.debug("Post-merge properties: " + new ConfigurationMap(properties));
        Scan.saveProperties(ioFactory, properties, outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    }

    final PrintStream stats = new PrintStream(
            ioFactory.getOutputStream(outputBasename + DiskBasedIndex.STATS_EXTENSION));
    if (!metadataOnly)
        indexWriter.printStats(stats);
    stats.close();
}