Example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap

List of usage examples for org.apache.commons.configuration ConfigurationMap ConfigurationMap

Introduction

In this page you can find the example usage for org.apache.commons.configuration ConfigurationMap ConfigurationMap.

Prototype

public ConfigurationMap(Configuration configuration) 

Source Link

Document

Creates a new instance of a ConfigurationMap that wraps the specified Configuration instance.

Usage

From source file:com.linkedin.pinot.common.segment.fetcher.SegmentFetcherFactory.java

public static void initSegmentFetcherFactory(Configuration pinotHelixProperties) {
    Configuration segmentFetcherFactoryConfig = pinotHelixProperties
            .subset(CommonConstants.Server.PREFIX_OF_CONFIG_OF_SEGMENT_FETCHER_FACTORY);

    Iterator segmentFetcherFactoryConfigIterator = segmentFetcherFactoryConfig.getKeys();
    while (segmentFetcherFactoryConfigIterator.hasNext()) {
        Object configKeyObject = segmentFetcherFactoryConfigIterator.next();
        try {/*from www .  ja  v a2s .  com*/
            String segmentFetcherConfigKey = configKeyObject.toString();
            String protocol = segmentFetcherConfigKey.split(".", 2)[0];
            if (!SegmentFetcherFactory.containsProtocol(protocol)) {
                SegmentFetcherFactory
                        .initSegmentFetcher(new ConfigurationMap(segmentFetcherFactoryConfig.subset(protocol)));
            }
        } catch (Exception e) {
            LOGGER.error("Got exception to process the key: " + configKeyObject);
        }
    }
}

From source file:com.linkedin.pinot.routing.PercentageBasedRoutingTableSelector.java

public void init(Configuration configuration) {
    try {/*ww w.  j a v a  2  s. co m*/
        Configuration tablesConfig = configuration.subset(TABLE_KEY);
        if (tablesConfig == null || tablesConfig.isEmpty()) {
            LOGGER.info("No specific table configuration. Using 0% LLC for all tables");
            return;
        }
        ConfigurationMap cmap = new ConfigurationMap(tablesConfig);
        Set<Map.Entry<String, Integer>> mapEntrySet = cmap.entrySet();
        for (Map.Entry<String, Integer> entry : mapEntrySet) {
            LOGGER.info("Using {} percent LLC routing for table {}", entry.getValue(), entry.getKey());
            _percentMap.put(entry.getKey(), entry.getValue());
        }
    } catch (Exception e) {
        LOGGER.warn("Could not parse get config for {}. Using no LLC routing", TABLE_KEY, e);
    }
}

From source file:com.bitcup.configurator.FileConfigMap.java

/**
 * Loads configuration properties file at the local, host, env and base levels
 * and exposes them as a {@link java.util.Map}.
 * <p/>//from  w  ww  .ja  va 2 s .  c  om
 * Configuration is refreshed at the default refresh delay value of
 * {@value #DEFAULT_REFRESH_DELAY_IN_SECONDS}.
 *
 * @param filename name of the properties file to load
 */
public FileConfigMap(String filename) {
    super(filename);
    this.map = new ConfigurationMap(this.configuration);
}

From source file:com.bitcup.configurator.FileConfigMap.java

/**
 * Loads configuration properties file at the local, host, env and base levels
 * and exposes them as a {@link java.util.Map}.  Sets the refresh delay on the
 * {@link org.apache.commons.configuration.reloading.FileChangedReloadingStrategy}.
 *
 * @param filename         name of the properties file to load
 * @param refreshDelaySecs refresh delay in seconds
 *///from   ww  w. j a  v a 2s.  c om
public FileConfigMap(String filename, int refreshDelaySecs) {
    super(filename, refreshDelaySecs);
    this.map = new ConfigurationMap(this.configuration);
}

From source file:it.unimi.dsi.util.Properties.java

/** Adds all properties from the given configuration. 
 * //from  w  ww . j a va2 s  .  com
 * <p>Properties from the new configuration will clear properties from the first one.
 * 
 * @param configuration a configuration.
 * */
@SuppressWarnings("unchecked")
public void addAll(final Configuration configuration) {
    new ConfigurationMap(this).putAll(new ConfigurationMap(configuration));
}

From source file:com.wrmsr.neurosis.util.Configs.java

public static Map<String, String> stripSubconfig(Map<String, String> properties, String prefix) {
    HierarchicalConfiguration hierarchicalProperties = toHierarchical(properties);
    Configuration subconfig;/*w  ww  . j a v  a  2 s. c om*/
    try {
        subconfig = hierarchicalProperties.configurationAt(prefix);
    } catch (IllegalArgumentException e) {
        return ImmutableMap.of();
    }

    Map<String, String> subproperties = new ConfigurationMap(subconfig).entrySet().stream()
            .collect(ImmutableCollectors.toImmutableMap(e -> checkNotNull(e.getKey()).toString(),
                    e -> checkNotNull(e.getValue()).toString()));

    hierarchicalProperties.clearTree(prefix);
    for (String key : Sets.newHashSet(properties.keySet())) {
        if (!hierarchicalProperties.containsKey(key)) {
            properties.remove(key);
        }
    }

    return subproperties;
}

From source file:com.ikanow.aleph2.graph.titan.services.TitanGraphService.java

@SuppressWarnings("unchecked")
@Override/*from  w w w  .  j a va  2s .  c o  m*/
public Config createRemoteConfig(Optional<DataBucketBean> maybe_bucket, Config local_config) {
    if (null == _titan)
        return local_config; // (titan is disabled, just pass through)

    final Config distributed_config = ConfigFactory.parseMap(
            (AbstractMap<String, ?>) (AbstractMap<?, ?>) new ConfigurationMap(_titan.configuration()));

    return local_config.withValue(TitanGraphConfigBean.PROPERTIES_ROOT + "."
            + BeanTemplateUtils.from(TitanGraphConfigBean.class).field(TitanGraphConfigBean::config_override),
            distributed_config.root());
}

From source file:it.unimi.di.big.mg4j.tool.PartitionLexically.java

@SuppressWarnings("resource")
public void run() throws ConfigurationException, IOException, ClassNotFoundException {
    final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS);
    final byte[] buffer = new byte[bufferSize];

    final Properties properties = new Properties(inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    final long numberOfTerms = properties.getLong(Index.PropertyKeys.TERMS);
    final Class<?> indexClass = Class.forName(properties.getString(Index.PropertyKeys.INDEXCLASS));
    final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom(indexClass);
    final boolean isQuasiSuccinct = QuasiSuccinctIndex.class.isAssignableFrom(indexClass);
    final ByteOrder byteOrder = isQuasiSuccinct
            ? DiskBasedIndex.byteOrder(properties.getString(QuasiSuccinctIndex.PropertyKeys.BYTEORDER))
            : null;/*  ww  w  . j  a  v  a2 s.  c o  m*/

    final OutputBitStream[] localIndex = new OutputBitStream[numIndices];
    final OutputBitStream[] localPositions = new OutputBitStream[numIndices];
    final OutputBitStream[] localOffsets = new OutputBitStream[numIndices];

    final LongWordOutputBitStream[] localQSPointers = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices]
            : null;
    final LongWordOutputBitStream[] localQSCounts = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices]
            : null;
    final LongWordOutputBitStream[] localQSPositions = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices]
            : null;
    final OutputBitStream[] localCountsOffsets = isQuasiSuccinct ? new OutputBitStream[numIndices] : null;
    final OutputBitStream[] localPositionsOffsets = isQuasiSuccinct ? new OutputBitStream[numIndices] : null;

    final OutputBitStream[] localPosNumBits = new OutputBitStream[numIndices];
    final OutputBitStream[] localSumsMaxPos = new OutputBitStream[numIndices];
    final OutputBitStream[] localFrequencies = new OutputBitStream[numIndices];
    final OutputBitStream[] localOccurrencies = new OutputBitStream[numIndices];
    final PrintWriter[] localTerms = new PrintWriter[numIndices];
    final long numTerms[] = new long[numIndices];
    final long localOccurrences[] = new long[numIndices];
    final long numberOfPostings[] = new long[numIndices];

    final InputBitStream globalIndex = isQuasiSuccinct ? null
            : new InputBitStream(inputBasename + DiskBasedIndex.INDEX_EXTENSION, bufferSize);
    final long globalPositionsLength = new File(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION).length();
    final InputBitStream globalPositions = isHighPerformance
            ? new InputBitStream(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize)
            : null;
    final FastBufferedReader terms = new FastBufferedReader(new InputStreamReader(
            new FileInputStream(inputBasename + DiskBasedIndex.TERMS_EXTENSION), "UTF-8"));
    final InputBitStream globalOffsets = new InputBitStream(inputBasename
            + (isQuasiSuccinct ? DiskBasedIndex.POINTERS_EXTENSIONS + DiskBasedIndex.OFFSETS_POSTFIX
                    : DiskBasedIndex.OFFSETS_EXTENSION));
    final InputBitStream globalPositionsOffsets = isQuasiSuccinct ? new InputBitStream(
            inputBasename + DiskBasedIndex.POSITIONS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX, bufferSize)
            : null;
    final InputBitStream globalCountsOffsets = isQuasiSuccinct ? new InputBitStream(
            inputBasename + DiskBasedIndex.COUNTS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX, bufferSize)
            : null;

    final LongWordInputBitStream globalPointersIbs = isQuasiSuccinct
            ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.POINTERS_EXTENSIONS, bufferSize,
                    byteOrder)
            : null;
    final LongWordInputBitStream globalCountsIbs = isQuasiSuccinct
            ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.COUNTS_EXTENSION, bufferSize, byteOrder)
            : null;
    final LongWordInputBitStream globalPositionsIbs = isQuasiSuccinct
            ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize,
                    byteOrder)
            : null;

    final File posNumBitsFile = new File(inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION);
    final InputBitStream posNumBits = posNumBitsFile.exists()
            ? new InputBitStream(inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION)
            : null;
    final File sumsMaxPosFile = new File(inputBasename + DiskBasedIndex.SUMS_MAX_POSITION_EXTENSION);
    final InputBitStream sumsMaxPos = sumsMaxPosFile.exists() ? new InputBitStream(sumsMaxPosFile) : null;
    final InputBitStream frequencies = new InputBitStream(inputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION);
    final InputBitStream occurrencies = new InputBitStream(
            inputBasename + DiskBasedIndex.OCCURRENCIES_EXTENSION);
    globalOffsets.readGamma();
    if (globalCountsOffsets != null)
        globalCountsOffsets.readGamma();
    if (globalPositionsOffsets != null)
        globalPositionsOffsets.readGamma();

    for (int i = 0; i < numIndices; i++) {
        if (!isQuasiSuccinct)
            localIndex[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.INDEX_EXTENSION, bufferSize);
        if (isHighPerformance)
            localPositions[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION,
                    bufferSize);
        if (isQuasiSuccinct) {
            localQSPointers[i] = new LongWordOutputBitStream(
                    new FileOutputStream(localBasename[i] + DiskBasedIndex.POINTERS_EXTENSIONS).getChannel(),
                    byteOrder);
            localQSCounts[i] = new LongWordOutputBitStream(
                    new FileOutputStream(localBasename[i] + DiskBasedIndex.COUNTS_EXTENSION).getChannel(),
                    byteOrder);
            localQSPositions[i] = new LongWordOutputBitStream(
                    new FileOutputStream(localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION).getChannel(),
                    byteOrder);
        }
        localFrequencies[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.FREQUENCIES_EXTENSION);
        localOccurrencies[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.OCCURRENCIES_EXTENSION);
        localTerms[i] = new PrintWriter(
                new OutputStreamWriter(
                        new FastBufferedOutputStream(
                                new FileOutputStream(localBasename[i] + DiskBasedIndex.TERMS_EXTENSION)),
                        "UTF-8"));
        localOffsets[i] = new OutputBitStream(localBasename[i]
                + (isQuasiSuccinct ? DiskBasedIndex.POINTERS_EXTENSIONS + DiskBasedIndex.OFFSETS_POSTFIX
                        : DiskBasedIndex.OFFSETS_EXTENSION));
        if (posNumBits != null)
            localPosNumBits[i] = new OutputBitStream(
                    localBasename[i] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION);
        if (sumsMaxPos != null)
            localSumsMaxPos[i] = new OutputBitStream(
                    localBasename[i] + DiskBasedIndex.SUMS_MAX_POSITION_EXTENSION);
        localOffsets[i].writeGamma(0);
        if (isQuasiSuccinct) {
            localCountsOffsets[i] = new OutputBitStream(
                    localBasename[i] + DiskBasedIndex.COUNTS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX);
            localPositionsOffsets[i] = new OutputBitStream(
                    localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX);
            localCountsOffsets[i].writeGamma(0);
            localPositionsOffsets[i].writeGamma(0);
        }
    }

    // The current term
    final MutableString currTerm = new MutableString();

    pl.expectedUpdates = numberOfTerms;
    pl.itemsName = "bits";
    pl.logInterval = logInterval;
    pl.start("Partitioning index...");

    long termNumber = 0;
    int k, prevK = -1;
    long previousHeaderLength = 0, newHeaderLength = 0;
    long length, positionsOffset = 0;

    while (terms.readLine(currTerm) != null) {
        k = strategy.localIndex(termNumber); // The local index for this term
        if (numTerms[k] != strategy.localNumber(termNumber))
            throw new IllegalStateException();
        numTerms[k]++;

        if (isHighPerformance) {
            final long temp = globalIndex.readBits();
            positionsOffset = globalIndex.readLongDelta();
            previousHeaderLength = (int) (globalIndex.readBits() - temp);
            if (prevK != -1) {
                length = positionsOffset - globalPositions.readBits();
                copy(buffer, globalPositions, localPositions[prevK], length);
            }
            newHeaderLength = localIndex[k].writeLongDelta(localPositions[k].writtenBits());
        }

        final long frequency = frequencies.readLongGamma();
        localFrequencies[k].writeLongGamma(frequency);
        numberOfPostings[k] += frequency;

        if (posNumBits != null)
            localPosNumBits[k].writeLongGamma(posNumBits.readLongGamma());
        if (sumsMaxPos != null)
            localSumsMaxPos[k].writeLongDelta(sumsMaxPos.readLongDelta());

        final long occurrency = occurrencies.readLongGamma();
        localOccurrences[k] += occurrency;
        localOccurrencies[k].writeLongGamma(occurrency);

        currTerm.println(localTerms[k]);

        if (isQuasiSuccinct) {
            localOffsets[k].writeLongGamma(length = globalOffsets.readLongGamma());
            copy(globalPointersIbs, localQSPointers[k], length);
            localCountsOffsets[k].writeLongGamma(length = globalCountsOffsets.readLongGamma());
            copy(globalCountsIbs, localQSCounts[k], length);
            localPositionsOffsets[k].writeLongGamma(length = globalPositionsOffsets.readLongGamma());
            copy(globalPositionsIbs, localQSPositions[k], length);
        } else {
            length = globalOffsets.readLongGamma() - previousHeaderLength;
            localOffsets[k].writeLongGamma(length + newHeaderLength);

            copy(buffer, globalIndex, localIndex[k], length);
        }

        pl.update();
        prevK = k;
        termNumber++;
    }

    // We pour the last piece of positions
    if (isHighPerformance) {
        if (prevK != -1) {
            length = globalPositionsLength * 8 - globalPositions.readBits();
            copy(buffer, globalPositions, localPositions[prevK], length);
        }
    }

    pl.done();

    terms.close();
    globalOffsets.close();
    if (globalIndex != null)
        globalIndex.close();
    if (globalPointersIbs != null)
        globalPointersIbs.close();
    if (globalCountsIbs != null)
        globalCountsIbs.close();
    if (globalCountsOffsets != null)
        globalCountsOffsets.close();
    if (globalPositionsIbs != null)
        globalPositionsIbs.close();
    if (globalPositionsOffsets != null)
        globalPositionsOffsets.close();

    if (globalPositions != null)
        globalPositions.close();
    frequencies.close();
    occurrencies.close();
    if (posNumBits != null)
        posNumBits.close();
    if (sumsMaxPos != null)
        sumsMaxPos.close();
    if (isHighPerformance)
        globalPositions.close();

    // We copy the relevant properties from the original 
    Properties globalProperties = new Properties();
    if (strategyFilename != null)
        globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename);
    globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, false);
    globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS, LexicalCluster.class.getName());
    for (int i = 0; i < numIndices; i++)
        globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[i]);
    globalProperties.setProperty(Index.PropertyKeys.FIELD, properties.getProperty(Index.PropertyKeys.FIELD));
    globalProperties.setProperty(Index.PropertyKeys.POSTINGS,
            properties.getProperty(Index.PropertyKeys.POSTINGS));
    globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES,
            properties.getProperty(Index.PropertyKeys.OCCURRENCES));
    globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS,
            properties.getProperty(Index.PropertyKeys.DOCUMENTS));
    globalProperties.setProperty(Index.PropertyKeys.TERMS, properties.getProperty(Index.PropertyKeys.TERMS));
    globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR,
            properties.getProperty(Index.PropertyKeys.TERMPROCESSOR));
    globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT,
            properties.getProperty(Index.PropertyKeys.MAXCOUNT));
    globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE,
            properties.getProperty(Index.PropertyKeys.MAXDOCSIZE));
    globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    LOGGER.debug(
            "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties));

    for (int i = 0; i < numIndices; i++) {
        if (isQuasiSuccinct) {
            localQSPointers[i].close();
            if (localQSCounts != null) {
                localCountsOffsets[i].close();
                localQSCounts[i].close();
            }
            if (localQSPositions != null) {
                localPositionsOffsets[i].close();
                localQSPositions[i].close();
            }
        } else {
            localIndex[i].close();
            if (isHighPerformance)
                localPositions[i].close();
        }

        localOffsets[i].close();

        if (posNumBits != null)
            localPosNumBits[i].close();
        if (sumsMaxPos != null)
            localSumsMaxPos[i].close();

        localFrequencies[i].close();
        localOccurrencies[i].close();
        localTerms[i].close();
        final InputStream input = new FileInputStream(inputBasename + DiskBasedIndex.SIZES_EXTENSION);
        final OutputStream output = new FileOutputStream(localBasename[i] + DiskBasedIndex.SIZES_EXTENSION);
        IOUtils.copy(input, output);
        input.close();
        output.close();
        Properties localProperties = new Properties();
        localProperties.addAll(globalProperties);
        localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[i]);
        localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, localOccurrences[i]);
        localProperties.setProperty(Index.PropertyKeys.POSTINGS, numberOfPostings[i]);
        localProperties.setProperty(Index.PropertyKeys.POSTINGS, numberOfPostings[i]);
        localProperties.setProperty(Index.PropertyKeys.INDEXCLASS,
                properties.getProperty(Index.PropertyKeys.INDEXCLASS));
        localProperties.setProperty(QuasiSuccinctIndex.PropertyKeys.BYTEORDER,
                properties.getProperty(QuasiSuccinctIndex.PropertyKeys.BYTEORDER));
        localProperties.addProperties(Index.PropertyKeys.CODING,
                properties.getStringArray(Index.PropertyKeys.CODING));
        localProperties.setProperty(BitStreamIndex.PropertyKeys.SKIPQUANTUM,
                properties.getProperty(BitStreamIndex.PropertyKeys.SKIPQUANTUM));
        localProperties.setProperty(BitStreamIndex.PropertyKeys.SKIPHEIGHT,
                properties.getProperty(BitStreamIndex.PropertyKeys.SKIPHEIGHT));
        if (strategyProperties != null && strategyProperties[i] != null)
            localProperties.addAll(strategyProperties[i]);
        localProperties.save(localBasename[i] + DiskBasedIndex.PROPERTIES_EXTENSION);
        LOGGER.debug("Post-partitioning properties for index " + localBasename[i] + ": "
                + new ConfigurationMap(localProperties));
    }
}

From source file:it.unimi.di.big.mg4j.tool.PartitionDocumentally.java

public void run() throws Exception {
    final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS);
    final IntBigList sizeList = globalIndex.sizes;
    partitionSizes();/*from  w ww. j  av a  2s. c om*/

    final int[] position = new int[Math.max(0, globalIndex.maxCount)];
    final long[] localFrequency = new long[numIndices];
    final long[] sumMaxPos = new long[numIndices];
    final int[] usedIndex = new int[numIndices];
    final InputBitStream[] direct = new InputBitStream[numIndices];
    final InputBitStream[] indirect = new InputBitStream[numIndices];
    @SuppressWarnings("unchecked")
    final BloomFilter<Void>[] bloomFilter = bloomFilterPrecision != 0 ? new BloomFilter[numIndices] : null;
    final File[] tempFile = new File[numIndices];
    final CachingOutputBitStream[] temp = new CachingOutputBitStream[numIndices];
    IndexIterator indexIterator;

    for (int i = 0; i < numIndices; i++) {
        tempFile[i] = new File(localBasename[i] + ".temp");
        temp[i] = new CachingOutputBitStream(tempFile[i], bufferSize);
        direct[i] = new InputBitStream(temp[i].buffer());
        indirect[i] = new InputBitStream(tempFile[i]);
        if (bloomFilterPrecision != 0)
            bloomFilter[i] = BloomFilter.create(globalIndex.numberOfTerms, bloomFilterPrecision);
    }
    int usedIndices;
    MutableString currentTerm = new MutableString();
    Payload payload = null;
    long frequency, globalPointer, localPointer;
    int localIndex, count = -1;

    pl.expectedUpdates = globalIndex.numberOfPostings;
    pl.itemsName = "postings";
    pl.logInterval = logInterval;
    pl.start("Partitioning index...");

    for (long t = 0; t < globalIndex.numberOfTerms; t++) {
        terms.readLine(currentTerm);
        indexIterator = indexReader.nextIterator();
        usedIndices = 0;
        frequency = indexIterator.frequency();

        for (long j = 0; j < frequency; j++) {
            globalPointer = indexIterator.nextDocument();
            localIndex = strategy.localIndex(globalPointer);

            if (localFrequency[localIndex] == 0) {
                // First time we see a document for this index.
                currentTerm.println(localTerms[localIndex]);
                numTerms[localIndex]++;
                usedIndex[usedIndices++] = localIndex;
                if (bloomFilterPrecision != 0)
                    bloomFilter[localIndex].add(currentTerm);
            }

            /* Store temporarily posting data; note that we save the global pointer as we
             * will have to access the size list. */

            localFrequency[localIndex]++;
            numPostings[localIndex]++;
            temp[localIndex].writeLongGamma(globalPointer);

            if (globalIndex.hasPayloads)
                payload = indexIterator.payload();
            if (havePayloads)
                payload.write(temp[localIndex]);

            if (haveCounts) {
                count = indexIterator.count();
                temp[localIndex].writeGamma(count);
                occurrencies[localIndex] += count;
                if (maxDocPos[localIndex] < count)
                    maxDocPos[localIndex] = count;
                if (havePositions) {
                    int pos = indexIterator.nextPosition(), prevPos = pos;
                    temp[localIndex].writeDelta(pos);
                    for (int p = 1; p < count; p++) {
                        temp[localIndex].writeDelta((pos = indexIterator.nextPosition()) - prevPos - 1);
                        prevPos = pos;
                    }
                    sumMaxPos[localIndex] += pos;
                }
            }
        }

        // We now run through the indices used by this term and copy from the temporary buffer.

        OutputBitStream obs;

        for (int k = 0; k < usedIndices; k++) {
            final int i = usedIndex[k];

            if (haveCounts)
                numOccurrences[i] += occurrencies[i];
            InputBitStream ibs;
            if (quasiSuccinctIndexWriter[i] != null)
                quasiSuccinctIndexWriter[i].newInvertedList(localFrequency[i], occurrencies[i], sumMaxPos[i]);
            else
                indexWriter[i].newInvertedList();
            occurrencies[i] = 0;

            temp[i].align();
            if (temp[i].buffer() != null)
                ibs = direct[i];
            else {
                // We cannot read directly from the internal buffer.
                ibs = indirect[i];
                ibs.flush();
                temp[i].flush();
            }

            ibs.position(0);

            indexWriter[i].writeFrequency(localFrequency[i]);
            for (long j = 0; j < localFrequency[i]; j++) {
                obs = indexWriter[i].newDocumentRecord();
                globalPointer = ibs.readLongGamma();
                localPointer = strategy.localPointer(globalPointer);
                indexWriter[i].writeDocumentPointer(obs, localPointer);
                if (havePayloads) {
                    payload.read(ibs);
                    indexWriter[i].writePayload(obs, payload);
                }
                if (haveCounts)
                    indexWriter[i].writePositionCount(obs, count = ibs.readGamma());
                if (havePositions) {
                    ibs.readDeltas(position, count);
                    for (int p = 1; p < count; p++)
                        position[p] += position[p - 1] + 1;
                    indexWriter[i].writeDocumentPositions(obs, position, 0, count,
                            sizeList != null ? sizeList.getInt(globalPointer) : -1);
                }

            }
            temp[i].position(0);
            temp[i].writtenBits(0);
            localFrequency[i] = 0;
            sumMaxPos[i] = 0;
        }

        usedIndices = 0;
        pl.count += frequency - 1;
        pl.update();
    }

    pl.done();

    Properties globalProperties = new Properties();
    globalProperties.setProperty(Index.PropertyKeys.FIELD,
            inputProperties.getProperty(Index.PropertyKeys.FIELD));
    globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR,
            inputProperties.getProperty(Index.PropertyKeys.TERMPROCESSOR));

    for (int i = 0; i < numIndices; i++) {
        localTerms[i].close();
        indexWriter[i].close();
        if (bloomFilterPrecision != 0)
            BinIO.storeObject(bloomFilter[i], localBasename[i] + DocumentalCluster.BLOOM_EXTENSION);
        temp[i].close();
        tempFile[i].delete();

        Properties localProperties = indexWriter[i].properties();
        localProperties.addAll(globalProperties);
        localProperties.setProperty(Index.PropertyKeys.MAXCOUNT, String.valueOf(maxDocPos[i]));
        localProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize[i]);
        localProperties.setProperty(Index.PropertyKeys.FIELD,
                globalProperties.getProperty(Index.PropertyKeys.FIELD));
        localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[i] : -1);
        localProperties.setProperty(Index.PropertyKeys.POSTINGS, numPostings[i]);
        localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[i]);
        if (havePayloads)
            localProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName());
        if (strategyProperties[i] != null)
            localProperties.addAll(strategyProperties[i]);
        localProperties.save(localBasename[i] + DiskBasedIndex.PROPERTIES_EXTENSION);
    }

    if (strategyFilename != null)
        globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename);
    for (int i = 0; i < numIndices; i++)
        globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[i]);
    globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0);
    // If we partition an index with a single term, by definition we have a flat cluster
    globalProperties.setProperty(DocumentalCluster.PropertyKeys.FLAT,
            inputProperties.getLong(Index.PropertyKeys.TERMS) <= 1);
    globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT,
            inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT));
    globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE,
            inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE));
    globalProperties.setProperty(Index.PropertyKeys.POSTINGS,
            inputProperties.getProperty(Index.PropertyKeys.POSTINGS));
    globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES,
            inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES));
    globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS,
            inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS));
    globalProperties.setProperty(Index.PropertyKeys.TERMS,
            inputProperties.getProperty(Index.PropertyKeys.TERMS));
    if (havePayloads)
        globalProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName());

    /* For the general case, we must rely on a merged cluster. However, if we detect a contiguous
     * strategy we can optimise a bit. */

    globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS,
            strategy instanceof ContiguousDocumentalStrategy ? DocumentalConcatenatedCluster.class.getName()
                    : DocumentalMergedCluster.class.getName());

    globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    LOGGER.debug(
            "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties));

}

From source file:edu.nyu.tandon.tool.PrunedPartition.java

public void run() throws Exception {

    final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS);
    final IntBigList sizeList = globalIndex.sizes;

    partitionSizes();/*from   w w w .j  a v a  2  s  .  c om*/

    final Long2LongOpenHashMap documents = new Long2LongOpenHashMap();

    long localFrequency = 0;
    long sumMaxPos = 0;

    InputBitStream direct;
    InputBitStream indirect;
    @SuppressWarnings("unchecked")
    BloomFilter<Void> bloomFilter;

    final File tempFile;
    final CachingOutputBitStream temp;

    final File orderFile;
    final CachingOutputBitStream order;

    long lID;

    IndexIterator indexIterator;

    bloomFilter = (bloomFilterPrecision != 0)
            ? BloomFilter.create(globalIndex.numberOfTerms, bloomFilterPrecision)
            : null;

    MutableString currentTerm = new MutableString();
    Payload payload = null;
    long frequency, globalPointer, localPointer, termID;
    int localIndex, count = -1;

    pl.expectedUpdates = globalIndex.numberOfPostings;
    pl.itemsName = "postings";
    pl.logInterval = logInterval;
    pl.start("Partitioning index...");

    final OutputBitStream globalFrequencies = new OutputBitStream(localBasename[0] + ".globaltermfreq");

    // for now, we rebuild the list in memory : TODO: fix so any size list is possible
    class DocEntry {
        long docID;
        Payload payload;
        int count;
        int[] pos;
    }
    Long2ObjectOpenHashMap<DocEntry> list = new Long2ObjectOpenHashMap<DocEntry>();
    list.clear();

    for (long t = 0; t < globalIndex.numberOfTerms; t++) {

        terms.readLine(currentTerm);

        indexIterator = indexReader.nextIterator();
        frequency = indexIterator.frequency();
        termID = indexIterator.termNumber();
        assert termID == t;

        localFrequency = 0;

        IntegerPayload payload1;

        // if posting pruning, and the term never made it to the pruned index; skip it
        if (!docPruning && (lID = ((PostingPruningStrategy) strategy).localTermId(termID)) == -1)
            continue;

        for (long j = 0; j < frequency; j++) {

            globalPointer = indexIterator.nextDocument();

            // prune accoring to type
            localIndex = (docPruning) ? strategy.localIndex(globalPointer)
                    : ((PostingPruningStrategy) strategy).localIndex(termID, globalPointer);

            // (term,doc) or doc in the pruned index?
            if (localIndex == 0) {

                // First time this term is seen
                if (localFrequency == 0) {
                    //                        assert numTerms[0] == ((PostingPruningStrategy) strategy).localTermId(termID);
                    numTerms[0]++;
                    currentTerm.println(localTerms[localIndex]); // save term
                    globalFrequencies.writeLongGamma(frequency); // save original term size
                    if (bloomFilterPrecision != 0)
                        bloomFilter.add(currentTerm);
                }

                /* Store temporarily posting data; note that we save the global pointer as we
                 * will have to access the size list. */
                // local docID is written in later...
                //
                if (globalIndex.hasPayloads)
                    payload = indexIterator.payload();

                DocEntry d = new DocEntry();
                d.docID = globalPointer;
                if (globalIndex.hasPayloads)
                    payload = indexIterator.payload();
                d.payload = (havePayloads) ? payload : null;
                count = (haveCounts) ? indexIterator.count() : 0;
                d.count = count;

                numPostings[0]++;

                if (haveCounts) {
                    occurrencies[localIndex] += count;
                    if (maxDocPos[localIndex] < count)
                        maxDocPos[localIndex] = count;
                    if (havePositions) {
                        d.pos = new int[count];
                        for (int p = 0; p < count; p++) {
                            int pos = indexIterator.nextPosition();
                            d.pos[p] = pos;
                            sumMaxPos += pos;
                        }
                    }
                }

                localFrequency++;
                list.put(strategy.localPointer(globalPointer), d);
            } else {
                // synchronize aux files
                if (globalIndex.hasPayloads)
                    payload = indexIterator.payload();
                if (haveCounts) {
                    count = indexIterator.count();
                    if (havePositions) {
                        for (int p = 0; p < count; p++) {
                            int pos = indexIterator.nextPosition();
                        }
                    }
                }
            }
        }

        // We now run through the pruned index and copy from the temporary buffer.
        OutputBitStream obs;

        // list will not be ordered anymore, since we will remap to local docIDs.
        // and the local docIDs were assigned by the strategy based on the strategy order (hits, etc)

        if (localFrequency > 0) {

            if (haveCounts)
                numOccurrences[0] += occurrencies[0];

            // create a post list
            if (quasiSuccinctIndexWriter[0] != null)
                quasiSuccinctIndexWriter[0].newInvertedList(localFrequency, occurrencies[0], sumMaxPos);
            else
                indexWriter[0].newInvertedList();

            occurrencies[0] = 0;

            indexWriter[0].writeFrequency(localFrequency);

            // we want the index list in local docID order
            long[] docs = list.keySet().toLongArray();
            Arrays.sort(docs);
            for (long localID : docs) {

                DocEntry d = list.get(localID);
                globalPointer = d.docID;
                if (havePayloads)
                    payload = d.payload;
                if (haveCounts)
                    count = d.count;

                // TODO: support positions

                // at the position we need
                obs = indexWriter[0].newDocumentRecord();

                // map from global docID to local docID
                //                    localPointer = strategy.localPointer(globalPointer);
                //                    assert localID == localPointer;
                indexWriter[0].writeDocumentPointer(obs, localID);

                if (havePayloads) {
                    indexWriter[0].writePayload(obs, payload);
                }

                if (haveCounts)
                    indexWriter[0].writePositionCount(obs, count);
                if (havePositions) {
                    indexWriter[0].writeDocumentPositions(obs, d.pos, 0, count,
                            sizeList != null ? sizeList.getInt(globalPointer) : -1);
                }
            }

            sumMaxPos = 0;
        } else {
            sumMaxPos = 0;
        }
        localFrequency = 0;
        pl.count += frequency - 1;
        pl.update();
        list.clear();

    }
    globalFrequencies.close();

    pl.done();

    Properties globalProperties = new Properties();
    globalProperties.setProperty(Index.PropertyKeys.FIELD,
            inputProperties.getProperty(Index.PropertyKeys.FIELD));
    globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR,
            inputProperties.getProperty(Index.PropertyKeys.TERMPROCESSOR));

    localTerms[0].close();
    indexWriter[0].close();
    if (bloomFilterPrecision != 0)
        BinIO.storeObject(bloomFilter, localBasename[0] + DocumentalCluster.BLOOM_EXTENSION);

    Properties localProperties = indexWriter[0].properties();
    localProperties.addAll(globalProperties);
    localProperties.setProperty(Index.PropertyKeys.MAXCOUNT, String.valueOf(maxDocPos[0]));
    localProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize[0]);
    localProperties.setProperty(Index.PropertyKeys.FIELD,
            globalProperties.getProperty(Index.PropertyKeys.FIELD));
    localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[0] : -1);
    localProperties.setProperty(Index.PropertyKeys.POSTINGS, numPostings[0]);
    localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[0]);
    if (havePayloads)
        localProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName());
    if (strategyProperties != null && strategyProperties[0] != null)
        localProperties.addAll(strategyProperties[0]);
    // add global properties
    localProperties.addProperty(globalPropertyKeys.G_MAXCOUNT,
            inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT));
    localProperties.addProperty(globalPropertyKeys.G_MAXDOCSIZE,
            inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE));
    localProperties.addProperty(globalPropertyKeys.G_POSTINGS,
            inputProperties.getProperty(Index.PropertyKeys.POSTINGS));
    localProperties.addProperty(globalPropertyKeys.G_OCCURRENCES,
            inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES));
    localProperties.addProperty(globalPropertyKeys.G_DOCUMENTS,
            inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS));
    localProperties.addProperty(globalPropertyKeys.G_TERMS,
            inputProperties.getProperty(Index.PropertyKeys.TERMS));

    localProperties.save(localBasename[0] + DiskBasedIndex.PROPERTIES_EXTENSION);

    if (strategyFilename != null)
        globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename);
    globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[0]);
    globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0);
    // If we partition an index with a single term, by definition we have a flat cluster
    globalProperties.setProperty(DocumentalCluster.PropertyKeys.FLAT,
            inputProperties.getLong(Index.PropertyKeys.TERMS) <= 1);
    globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT,
            inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT));
    globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE,
            inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE));
    globalProperties.setProperty(Index.PropertyKeys.POSTINGS,
            inputProperties.getProperty(Index.PropertyKeys.POSTINGS));
    globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES,
            inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES));
    globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS,
            inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS));
    globalProperties.setProperty(Index.PropertyKeys.TERMS,
            inputProperties.getProperty(Index.PropertyKeys.TERMS));
    if (havePayloads)
        globalProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName());

    /* For the general case, we must rely on a merged cluster. However, if we detect a contiguous
       * strategy we can optimise a bit. */

    globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS,
            strategy instanceof ContiguousDocumentalStrategy ? DocumentalConcatenatedCluster.class.getName()
                    : DocumentalMergedCluster.class.getName());

    globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION);
    LOGGER.debug(
            "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties));

}