List of usage examples for org.apache.commons.configuration ConfigurationMap ConfigurationMap
public ConfigurationMap(Configuration configuration)
ConfigurationMap
that wraps the specified Configuration
instance. From source file:com.linkedin.pinot.common.segment.fetcher.SegmentFetcherFactory.java
public static void initSegmentFetcherFactory(Configuration pinotHelixProperties) { Configuration segmentFetcherFactoryConfig = pinotHelixProperties .subset(CommonConstants.Server.PREFIX_OF_CONFIG_OF_SEGMENT_FETCHER_FACTORY); Iterator segmentFetcherFactoryConfigIterator = segmentFetcherFactoryConfig.getKeys(); while (segmentFetcherFactoryConfigIterator.hasNext()) { Object configKeyObject = segmentFetcherFactoryConfigIterator.next(); try {/*from www . ja v a2s . com*/ String segmentFetcherConfigKey = configKeyObject.toString(); String protocol = segmentFetcherConfigKey.split(".", 2)[0]; if (!SegmentFetcherFactory.containsProtocol(protocol)) { SegmentFetcherFactory .initSegmentFetcher(new ConfigurationMap(segmentFetcherFactoryConfig.subset(protocol))); } } catch (Exception e) { LOGGER.error("Got exception to process the key: " + configKeyObject); } } }
From source file:com.linkedin.pinot.routing.PercentageBasedRoutingTableSelector.java
public void init(Configuration configuration) { try {/*ww w. j a v a 2 s. co m*/ Configuration tablesConfig = configuration.subset(TABLE_KEY); if (tablesConfig == null || tablesConfig.isEmpty()) { LOGGER.info("No specific table configuration. Using 0% LLC for all tables"); return; } ConfigurationMap cmap = new ConfigurationMap(tablesConfig); Set<Map.Entry<String, Integer>> mapEntrySet = cmap.entrySet(); for (Map.Entry<String, Integer> entry : mapEntrySet) { LOGGER.info("Using {} percent LLC routing for table {}", entry.getValue(), entry.getKey()); _percentMap.put(entry.getKey(), entry.getValue()); } } catch (Exception e) { LOGGER.warn("Could not parse get config for {}. Using no LLC routing", TABLE_KEY, e); } }
From source file:com.bitcup.configurator.FileConfigMap.java
/** * Loads configuration properties file at the local, host, env and base levels * and exposes them as a {@link java.util.Map}. * <p/>//from w ww .ja va 2 s . c om * Configuration is refreshed at the default refresh delay value of * {@value #DEFAULT_REFRESH_DELAY_IN_SECONDS}. * * @param filename name of the properties file to load */ public FileConfigMap(String filename) { super(filename); this.map = new ConfigurationMap(this.configuration); }
From source file:com.bitcup.configurator.FileConfigMap.java
/** * Loads configuration properties file at the local, host, env and base levels * and exposes them as a {@link java.util.Map}. Sets the refresh delay on the * {@link org.apache.commons.configuration.reloading.FileChangedReloadingStrategy}. * * @param filename name of the properties file to load * @param refreshDelaySecs refresh delay in seconds *///from ww w. j a v a 2s. c om public FileConfigMap(String filename, int refreshDelaySecs) { super(filename, refreshDelaySecs); this.map = new ConfigurationMap(this.configuration); }
From source file:it.unimi.dsi.util.Properties.java
/** Adds all properties from the given configuration. * //from w ww . j a va2 s . com * <p>Properties from the new configuration will clear properties from the first one. * * @param configuration a configuration. * */ @SuppressWarnings("unchecked") public void addAll(final Configuration configuration) { new ConfigurationMap(this).putAll(new ConfigurationMap(configuration)); }
From source file:com.wrmsr.neurosis.util.Configs.java
public static Map<String, String> stripSubconfig(Map<String, String> properties, String prefix) { HierarchicalConfiguration hierarchicalProperties = toHierarchical(properties); Configuration subconfig;/*w ww . j a v a 2 s. c om*/ try { subconfig = hierarchicalProperties.configurationAt(prefix); } catch (IllegalArgumentException e) { return ImmutableMap.of(); } Map<String, String> subproperties = new ConfigurationMap(subconfig).entrySet().stream() .collect(ImmutableCollectors.toImmutableMap(e -> checkNotNull(e.getKey()).toString(), e -> checkNotNull(e.getValue()).toString())); hierarchicalProperties.clearTree(prefix); for (String key : Sets.newHashSet(properties.keySet())) { if (!hierarchicalProperties.containsKey(key)) { properties.remove(key); } } return subproperties; }
From source file:com.ikanow.aleph2.graph.titan.services.TitanGraphService.java
@SuppressWarnings("unchecked") @Override/*from w w w . j a va 2s . c o m*/ public Config createRemoteConfig(Optional<DataBucketBean> maybe_bucket, Config local_config) { if (null == _titan) return local_config; // (titan is disabled, just pass through) final Config distributed_config = ConfigFactory.parseMap( (AbstractMap<String, ?>) (AbstractMap<?, ?>) new ConfigurationMap(_titan.configuration())); return local_config.withValue(TitanGraphConfigBean.PROPERTIES_ROOT + "." + BeanTemplateUtils.from(TitanGraphConfigBean.class).field(TitanGraphConfigBean::config_override), distributed_config.root()); }
From source file:it.unimi.di.big.mg4j.tool.PartitionLexically.java
@SuppressWarnings("resource") public void run() throws ConfigurationException, IOException, ClassNotFoundException { final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS); final byte[] buffer = new byte[bufferSize]; final Properties properties = new Properties(inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION); final long numberOfTerms = properties.getLong(Index.PropertyKeys.TERMS); final Class<?> indexClass = Class.forName(properties.getString(Index.PropertyKeys.INDEXCLASS)); final boolean isHighPerformance = BitStreamHPIndex.class.isAssignableFrom(indexClass); final boolean isQuasiSuccinct = QuasiSuccinctIndex.class.isAssignableFrom(indexClass); final ByteOrder byteOrder = isQuasiSuccinct ? DiskBasedIndex.byteOrder(properties.getString(QuasiSuccinctIndex.PropertyKeys.BYTEORDER)) : null;/* ww w . j a v a2 s. c o m*/ final OutputBitStream[] localIndex = new OutputBitStream[numIndices]; final OutputBitStream[] localPositions = new OutputBitStream[numIndices]; final OutputBitStream[] localOffsets = new OutputBitStream[numIndices]; final LongWordOutputBitStream[] localQSPointers = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices] : null; final LongWordOutputBitStream[] localQSCounts = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices] : null; final LongWordOutputBitStream[] localQSPositions = isQuasiSuccinct ? new LongWordOutputBitStream[numIndices] : null; final OutputBitStream[] localCountsOffsets = isQuasiSuccinct ? new OutputBitStream[numIndices] : null; final OutputBitStream[] localPositionsOffsets = isQuasiSuccinct ? new OutputBitStream[numIndices] : null; final OutputBitStream[] localPosNumBits = new OutputBitStream[numIndices]; final OutputBitStream[] localSumsMaxPos = new OutputBitStream[numIndices]; final OutputBitStream[] localFrequencies = new OutputBitStream[numIndices]; final OutputBitStream[] localOccurrencies = new OutputBitStream[numIndices]; final PrintWriter[] localTerms = new PrintWriter[numIndices]; final long numTerms[] = new long[numIndices]; final long localOccurrences[] = new long[numIndices]; final long numberOfPostings[] = new long[numIndices]; final InputBitStream globalIndex = isQuasiSuccinct ? null : new InputBitStream(inputBasename + DiskBasedIndex.INDEX_EXTENSION, bufferSize); final long globalPositionsLength = new File(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION).length(); final InputBitStream globalPositions = isHighPerformance ? new InputBitStream(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize) : null; final FastBufferedReader terms = new FastBufferedReader(new InputStreamReader( new FileInputStream(inputBasename + DiskBasedIndex.TERMS_EXTENSION), "UTF-8")); final InputBitStream globalOffsets = new InputBitStream(inputBasename + (isQuasiSuccinct ? DiskBasedIndex.POINTERS_EXTENSIONS + DiskBasedIndex.OFFSETS_POSTFIX : DiskBasedIndex.OFFSETS_EXTENSION)); final InputBitStream globalPositionsOffsets = isQuasiSuccinct ? new InputBitStream( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX, bufferSize) : null; final InputBitStream globalCountsOffsets = isQuasiSuccinct ? new InputBitStream( inputBasename + DiskBasedIndex.COUNTS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX, bufferSize) : null; final LongWordInputBitStream globalPointersIbs = isQuasiSuccinct ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.POINTERS_EXTENSIONS, bufferSize, byteOrder) : null; final LongWordInputBitStream globalCountsIbs = isQuasiSuccinct ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.COUNTS_EXTENSION, bufferSize, byteOrder) : null; final LongWordInputBitStream globalPositionsIbs = isQuasiSuccinct ? new LongWordInputBitStream(inputBasename + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize, byteOrder) : null; final File posNumBitsFile = new File(inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION); final InputBitStream posNumBits = posNumBitsFile.exists() ? new InputBitStream(inputBasename + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION) : null; final File sumsMaxPosFile = new File(inputBasename + DiskBasedIndex.SUMS_MAX_POSITION_EXTENSION); final InputBitStream sumsMaxPos = sumsMaxPosFile.exists() ? new InputBitStream(sumsMaxPosFile) : null; final InputBitStream frequencies = new InputBitStream(inputBasename + DiskBasedIndex.FREQUENCIES_EXTENSION); final InputBitStream occurrencies = new InputBitStream( inputBasename + DiskBasedIndex.OCCURRENCIES_EXTENSION); globalOffsets.readGamma(); if (globalCountsOffsets != null) globalCountsOffsets.readGamma(); if (globalPositionsOffsets != null) globalPositionsOffsets.readGamma(); for (int i = 0; i < numIndices; i++) { if (!isQuasiSuccinct) localIndex[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.INDEX_EXTENSION, bufferSize); if (isHighPerformance) localPositions[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION, bufferSize); if (isQuasiSuccinct) { localQSPointers[i] = new LongWordOutputBitStream( new FileOutputStream(localBasename[i] + DiskBasedIndex.POINTERS_EXTENSIONS).getChannel(), byteOrder); localQSCounts[i] = new LongWordOutputBitStream( new FileOutputStream(localBasename[i] + DiskBasedIndex.COUNTS_EXTENSION).getChannel(), byteOrder); localQSPositions[i] = new LongWordOutputBitStream( new FileOutputStream(localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION).getChannel(), byteOrder); } localFrequencies[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.FREQUENCIES_EXTENSION); localOccurrencies[i] = new OutputBitStream(localBasename[i] + DiskBasedIndex.OCCURRENCIES_EXTENSION); localTerms[i] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream(localBasename[i] + DiskBasedIndex.TERMS_EXTENSION)), "UTF-8")); localOffsets[i] = new OutputBitStream(localBasename[i] + (isQuasiSuccinct ? DiskBasedIndex.POINTERS_EXTENSIONS + DiskBasedIndex.OFFSETS_POSTFIX : DiskBasedIndex.OFFSETS_EXTENSION)); if (posNumBits != null) localPosNumBits[i] = new OutputBitStream( localBasename[i] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION); if (sumsMaxPos != null) localSumsMaxPos[i] = new OutputBitStream( localBasename[i] + DiskBasedIndex.SUMS_MAX_POSITION_EXTENSION); localOffsets[i].writeGamma(0); if (isQuasiSuccinct) { localCountsOffsets[i] = new OutputBitStream( localBasename[i] + DiskBasedIndex.COUNTS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX); localPositionsOffsets[i] = new OutputBitStream( localBasename[i] + DiskBasedIndex.POSITIONS_EXTENSION + DiskBasedIndex.OFFSETS_POSTFIX); localCountsOffsets[i].writeGamma(0); localPositionsOffsets[i].writeGamma(0); } } // The current term final MutableString currTerm = new MutableString(); pl.expectedUpdates = numberOfTerms; pl.itemsName = "bits"; pl.logInterval = logInterval; pl.start("Partitioning index..."); long termNumber = 0; int k, prevK = -1; long previousHeaderLength = 0, newHeaderLength = 0; long length, positionsOffset = 0; while (terms.readLine(currTerm) != null) { k = strategy.localIndex(termNumber); // The local index for this term if (numTerms[k] != strategy.localNumber(termNumber)) throw new IllegalStateException(); numTerms[k]++; if (isHighPerformance) { final long temp = globalIndex.readBits(); positionsOffset = globalIndex.readLongDelta(); previousHeaderLength = (int) (globalIndex.readBits() - temp); if (prevK != -1) { length = positionsOffset - globalPositions.readBits(); copy(buffer, globalPositions, localPositions[prevK], length); } newHeaderLength = localIndex[k].writeLongDelta(localPositions[k].writtenBits()); } final long frequency = frequencies.readLongGamma(); localFrequencies[k].writeLongGamma(frequency); numberOfPostings[k] += frequency; if (posNumBits != null) localPosNumBits[k].writeLongGamma(posNumBits.readLongGamma()); if (sumsMaxPos != null) localSumsMaxPos[k].writeLongDelta(sumsMaxPos.readLongDelta()); final long occurrency = occurrencies.readLongGamma(); localOccurrences[k] += occurrency; localOccurrencies[k].writeLongGamma(occurrency); currTerm.println(localTerms[k]); if (isQuasiSuccinct) { localOffsets[k].writeLongGamma(length = globalOffsets.readLongGamma()); copy(globalPointersIbs, localQSPointers[k], length); localCountsOffsets[k].writeLongGamma(length = globalCountsOffsets.readLongGamma()); copy(globalCountsIbs, localQSCounts[k], length); localPositionsOffsets[k].writeLongGamma(length = globalPositionsOffsets.readLongGamma()); copy(globalPositionsIbs, localQSPositions[k], length); } else { length = globalOffsets.readLongGamma() - previousHeaderLength; localOffsets[k].writeLongGamma(length + newHeaderLength); copy(buffer, globalIndex, localIndex[k], length); } pl.update(); prevK = k; termNumber++; } // We pour the last piece of positions if (isHighPerformance) { if (prevK != -1) { length = globalPositionsLength * 8 - globalPositions.readBits(); copy(buffer, globalPositions, localPositions[prevK], length); } } pl.done(); terms.close(); globalOffsets.close(); if (globalIndex != null) globalIndex.close(); if (globalPointersIbs != null) globalPointersIbs.close(); if (globalCountsIbs != null) globalCountsIbs.close(); if (globalCountsOffsets != null) globalCountsOffsets.close(); if (globalPositionsIbs != null) globalPositionsIbs.close(); if (globalPositionsOffsets != null) globalPositionsOffsets.close(); if (globalPositions != null) globalPositions.close(); frequencies.close(); occurrencies.close(); if (posNumBits != null) posNumBits.close(); if (sumsMaxPos != null) sumsMaxPos.close(); if (isHighPerformance) globalPositions.close(); // We copy the relevant properties from the original Properties globalProperties = new Properties(); if (strategyFilename != null) globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename); globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, false); globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS, LexicalCluster.class.getName()); for (int i = 0; i < numIndices; i++) globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[i]); globalProperties.setProperty(Index.PropertyKeys.FIELD, properties.getProperty(Index.PropertyKeys.FIELD)); globalProperties.setProperty(Index.PropertyKeys.POSTINGS, properties.getProperty(Index.PropertyKeys.POSTINGS)); globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, properties.getProperty(Index.PropertyKeys.OCCURRENCES)); globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS, properties.getProperty(Index.PropertyKeys.DOCUMENTS)); globalProperties.setProperty(Index.PropertyKeys.TERMS, properties.getProperty(Index.PropertyKeys.TERMS)); globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR, properties.getProperty(Index.PropertyKeys.TERMPROCESSOR)); globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, properties.getProperty(Index.PropertyKeys.MAXCOUNT)); globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, properties.getProperty(Index.PropertyKeys.MAXDOCSIZE)); globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION); LOGGER.debug( "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties)); for (int i = 0; i < numIndices; i++) { if (isQuasiSuccinct) { localQSPointers[i].close(); if (localQSCounts != null) { localCountsOffsets[i].close(); localQSCounts[i].close(); } if (localQSPositions != null) { localPositionsOffsets[i].close(); localQSPositions[i].close(); } } else { localIndex[i].close(); if (isHighPerformance) localPositions[i].close(); } localOffsets[i].close(); if (posNumBits != null) localPosNumBits[i].close(); if (sumsMaxPos != null) localSumsMaxPos[i].close(); localFrequencies[i].close(); localOccurrencies[i].close(); localTerms[i].close(); final InputStream input = new FileInputStream(inputBasename + DiskBasedIndex.SIZES_EXTENSION); final OutputStream output = new FileOutputStream(localBasename[i] + DiskBasedIndex.SIZES_EXTENSION); IOUtils.copy(input, output); input.close(); output.close(); Properties localProperties = new Properties(); localProperties.addAll(globalProperties); localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[i]); localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, localOccurrences[i]); localProperties.setProperty(Index.PropertyKeys.POSTINGS, numberOfPostings[i]); localProperties.setProperty(Index.PropertyKeys.POSTINGS, numberOfPostings[i]); localProperties.setProperty(Index.PropertyKeys.INDEXCLASS, properties.getProperty(Index.PropertyKeys.INDEXCLASS)); localProperties.setProperty(QuasiSuccinctIndex.PropertyKeys.BYTEORDER, properties.getProperty(QuasiSuccinctIndex.PropertyKeys.BYTEORDER)); localProperties.addProperties(Index.PropertyKeys.CODING, properties.getStringArray(Index.PropertyKeys.CODING)); localProperties.setProperty(BitStreamIndex.PropertyKeys.SKIPQUANTUM, properties.getProperty(BitStreamIndex.PropertyKeys.SKIPQUANTUM)); localProperties.setProperty(BitStreamIndex.PropertyKeys.SKIPHEIGHT, properties.getProperty(BitStreamIndex.PropertyKeys.SKIPHEIGHT)); if (strategyProperties != null && strategyProperties[i] != null) localProperties.addAll(strategyProperties[i]); localProperties.save(localBasename[i] + DiskBasedIndex.PROPERTIES_EXTENSION); LOGGER.debug("Post-partitioning properties for index " + localBasename[i] + ": " + new ConfigurationMap(localProperties)); } }
From source file:it.unimi.di.big.mg4j.tool.PartitionDocumentally.java
public void run() throws Exception { final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS); final IntBigList sizeList = globalIndex.sizes; partitionSizes();/*from w ww. j av a 2s. c om*/ final int[] position = new int[Math.max(0, globalIndex.maxCount)]; final long[] localFrequency = new long[numIndices]; final long[] sumMaxPos = new long[numIndices]; final int[] usedIndex = new int[numIndices]; final InputBitStream[] direct = new InputBitStream[numIndices]; final InputBitStream[] indirect = new InputBitStream[numIndices]; @SuppressWarnings("unchecked") final BloomFilter<Void>[] bloomFilter = bloomFilterPrecision != 0 ? new BloomFilter[numIndices] : null; final File[] tempFile = new File[numIndices]; final CachingOutputBitStream[] temp = new CachingOutputBitStream[numIndices]; IndexIterator indexIterator; for (int i = 0; i < numIndices; i++) { tempFile[i] = new File(localBasename[i] + ".temp"); temp[i] = new CachingOutputBitStream(tempFile[i], bufferSize); direct[i] = new InputBitStream(temp[i].buffer()); indirect[i] = new InputBitStream(tempFile[i]); if (bloomFilterPrecision != 0) bloomFilter[i] = BloomFilter.create(globalIndex.numberOfTerms, bloomFilterPrecision); } int usedIndices; MutableString currentTerm = new MutableString(); Payload payload = null; long frequency, globalPointer, localPointer; int localIndex, count = -1; pl.expectedUpdates = globalIndex.numberOfPostings; pl.itemsName = "postings"; pl.logInterval = logInterval; pl.start("Partitioning index..."); for (long t = 0; t < globalIndex.numberOfTerms; t++) { terms.readLine(currentTerm); indexIterator = indexReader.nextIterator(); usedIndices = 0; frequency = indexIterator.frequency(); for (long j = 0; j < frequency; j++) { globalPointer = indexIterator.nextDocument(); localIndex = strategy.localIndex(globalPointer); if (localFrequency[localIndex] == 0) { // First time we see a document for this index. currentTerm.println(localTerms[localIndex]); numTerms[localIndex]++; usedIndex[usedIndices++] = localIndex; if (bloomFilterPrecision != 0) bloomFilter[localIndex].add(currentTerm); } /* Store temporarily posting data; note that we save the global pointer as we * will have to access the size list. */ localFrequency[localIndex]++; numPostings[localIndex]++; temp[localIndex].writeLongGamma(globalPointer); if (globalIndex.hasPayloads) payload = indexIterator.payload(); if (havePayloads) payload.write(temp[localIndex]); if (haveCounts) { count = indexIterator.count(); temp[localIndex].writeGamma(count); occurrencies[localIndex] += count; if (maxDocPos[localIndex] < count) maxDocPos[localIndex] = count; if (havePositions) { int pos = indexIterator.nextPosition(), prevPos = pos; temp[localIndex].writeDelta(pos); for (int p = 1; p < count; p++) { temp[localIndex].writeDelta((pos = indexIterator.nextPosition()) - prevPos - 1); prevPos = pos; } sumMaxPos[localIndex] += pos; } } } // We now run through the indices used by this term and copy from the temporary buffer. OutputBitStream obs; for (int k = 0; k < usedIndices; k++) { final int i = usedIndex[k]; if (haveCounts) numOccurrences[i] += occurrencies[i]; InputBitStream ibs; if (quasiSuccinctIndexWriter[i] != null) quasiSuccinctIndexWriter[i].newInvertedList(localFrequency[i], occurrencies[i], sumMaxPos[i]); else indexWriter[i].newInvertedList(); occurrencies[i] = 0; temp[i].align(); if (temp[i].buffer() != null) ibs = direct[i]; else { // We cannot read directly from the internal buffer. ibs = indirect[i]; ibs.flush(); temp[i].flush(); } ibs.position(0); indexWriter[i].writeFrequency(localFrequency[i]); for (long j = 0; j < localFrequency[i]; j++) { obs = indexWriter[i].newDocumentRecord(); globalPointer = ibs.readLongGamma(); localPointer = strategy.localPointer(globalPointer); indexWriter[i].writeDocumentPointer(obs, localPointer); if (havePayloads) { payload.read(ibs); indexWriter[i].writePayload(obs, payload); } if (haveCounts) indexWriter[i].writePositionCount(obs, count = ibs.readGamma()); if (havePositions) { ibs.readDeltas(position, count); for (int p = 1; p < count; p++) position[p] += position[p - 1] + 1; indexWriter[i].writeDocumentPositions(obs, position, 0, count, sizeList != null ? sizeList.getInt(globalPointer) : -1); } } temp[i].position(0); temp[i].writtenBits(0); localFrequency[i] = 0; sumMaxPos[i] = 0; } usedIndices = 0; pl.count += frequency - 1; pl.update(); } pl.done(); Properties globalProperties = new Properties(); globalProperties.setProperty(Index.PropertyKeys.FIELD, inputProperties.getProperty(Index.PropertyKeys.FIELD)); globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR, inputProperties.getProperty(Index.PropertyKeys.TERMPROCESSOR)); for (int i = 0; i < numIndices; i++) { localTerms[i].close(); indexWriter[i].close(); if (bloomFilterPrecision != 0) BinIO.storeObject(bloomFilter[i], localBasename[i] + DocumentalCluster.BLOOM_EXTENSION); temp[i].close(); tempFile[i].delete(); Properties localProperties = indexWriter[i].properties(); localProperties.addAll(globalProperties); localProperties.setProperty(Index.PropertyKeys.MAXCOUNT, String.valueOf(maxDocPos[i])); localProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize[i]); localProperties.setProperty(Index.PropertyKeys.FIELD, globalProperties.getProperty(Index.PropertyKeys.FIELD)); localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[i] : -1); localProperties.setProperty(Index.PropertyKeys.POSTINGS, numPostings[i]); localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[i]); if (havePayloads) localProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName()); if (strategyProperties[i] != null) localProperties.addAll(strategyProperties[i]); localProperties.save(localBasename[i] + DiskBasedIndex.PROPERTIES_EXTENSION); } if (strategyFilename != null) globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename); for (int i = 0; i < numIndices; i++) globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[i]); globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0); // If we partition an index with a single term, by definition we have a flat cluster globalProperties.setProperty(DocumentalCluster.PropertyKeys.FLAT, inputProperties.getLong(Index.PropertyKeys.TERMS) <= 1); globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT)); globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE)); globalProperties.setProperty(Index.PropertyKeys.POSTINGS, inputProperties.getProperty(Index.PropertyKeys.POSTINGS)); globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES)); globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS, inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS)); globalProperties.setProperty(Index.PropertyKeys.TERMS, inputProperties.getProperty(Index.PropertyKeys.TERMS)); if (havePayloads) globalProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName()); /* For the general case, we must rely on a merged cluster. However, if we detect a contiguous * strategy we can optimise a bit. */ globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS, strategy instanceof ContiguousDocumentalStrategy ? DocumentalConcatenatedCluster.class.getName() : DocumentalMergedCluster.class.getName()); globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION); LOGGER.debug( "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties)); }
From source file:edu.nyu.tandon.tool.PrunedPartition.java
public void run() throws Exception { final ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, TimeUnit.MILLISECONDS); final IntBigList sizeList = globalIndex.sizes; partitionSizes();/*from w w w .j a v a 2 s . c om*/ final Long2LongOpenHashMap documents = new Long2LongOpenHashMap(); long localFrequency = 0; long sumMaxPos = 0; InputBitStream direct; InputBitStream indirect; @SuppressWarnings("unchecked") BloomFilter<Void> bloomFilter; final File tempFile; final CachingOutputBitStream temp; final File orderFile; final CachingOutputBitStream order; long lID; IndexIterator indexIterator; bloomFilter = (bloomFilterPrecision != 0) ? BloomFilter.create(globalIndex.numberOfTerms, bloomFilterPrecision) : null; MutableString currentTerm = new MutableString(); Payload payload = null; long frequency, globalPointer, localPointer, termID; int localIndex, count = -1; pl.expectedUpdates = globalIndex.numberOfPostings; pl.itemsName = "postings"; pl.logInterval = logInterval; pl.start("Partitioning index..."); final OutputBitStream globalFrequencies = new OutputBitStream(localBasename[0] + ".globaltermfreq"); // for now, we rebuild the list in memory : TODO: fix so any size list is possible class DocEntry { long docID; Payload payload; int count; int[] pos; } Long2ObjectOpenHashMap<DocEntry> list = new Long2ObjectOpenHashMap<DocEntry>(); list.clear(); for (long t = 0; t < globalIndex.numberOfTerms; t++) { terms.readLine(currentTerm); indexIterator = indexReader.nextIterator(); frequency = indexIterator.frequency(); termID = indexIterator.termNumber(); assert termID == t; localFrequency = 0; IntegerPayload payload1; // if posting pruning, and the term never made it to the pruned index; skip it if (!docPruning && (lID = ((PostingPruningStrategy) strategy).localTermId(termID)) == -1) continue; for (long j = 0; j < frequency; j++) { globalPointer = indexIterator.nextDocument(); // prune accoring to type localIndex = (docPruning) ? strategy.localIndex(globalPointer) : ((PostingPruningStrategy) strategy).localIndex(termID, globalPointer); // (term,doc) or doc in the pruned index? if (localIndex == 0) { // First time this term is seen if (localFrequency == 0) { // assert numTerms[0] == ((PostingPruningStrategy) strategy).localTermId(termID); numTerms[0]++; currentTerm.println(localTerms[localIndex]); // save term globalFrequencies.writeLongGamma(frequency); // save original term size if (bloomFilterPrecision != 0) bloomFilter.add(currentTerm); } /* Store temporarily posting data; note that we save the global pointer as we * will have to access the size list. */ // local docID is written in later... // if (globalIndex.hasPayloads) payload = indexIterator.payload(); DocEntry d = new DocEntry(); d.docID = globalPointer; if (globalIndex.hasPayloads) payload = indexIterator.payload(); d.payload = (havePayloads) ? payload : null; count = (haveCounts) ? indexIterator.count() : 0; d.count = count; numPostings[0]++; if (haveCounts) { occurrencies[localIndex] += count; if (maxDocPos[localIndex] < count) maxDocPos[localIndex] = count; if (havePositions) { d.pos = new int[count]; for (int p = 0; p < count; p++) { int pos = indexIterator.nextPosition(); d.pos[p] = pos; sumMaxPos += pos; } } } localFrequency++; list.put(strategy.localPointer(globalPointer), d); } else { // synchronize aux files if (globalIndex.hasPayloads) payload = indexIterator.payload(); if (haveCounts) { count = indexIterator.count(); if (havePositions) { for (int p = 0; p < count; p++) { int pos = indexIterator.nextPosition(); } } } } } // We now run through the pruned index and copy from the temporary buffer. OutputBitStream obs; // list will not be ordered anymore, since we will remap to local docIDs. // and the local docIDs were assigned by the strategy based on the strategy order (hits, etc) if (localFrequency > 0) { if (haveCounts) numOccurrences[0] += occurrencies[0]; // create a post list if (quasiSuccinctIndexWriter[0] != null) quasiSuccinctIndexWriter[0].newInvertedList(localFrequency, occurrencies[0], sumMaxPos); else indexWriter[0].newInvertedList(); occurrencies[0] = 0; indexWriter[0].writeFrequency(localFrequency); // we want the index list in local docID order long[] docs = list.keySet().toLongArray(); Arrays.sort(docs); for (long localID : docs) { DocEntry d = list.get(localID); globalPointer = d.docID; if (havePayloads) payload = d.payload; if (haveCounts) count = d.count; // TODO: support positions // at the position we need obs = indexWriter[0].newDocumentRecord(); // map from global docID to local docID // localPointer = strategy.localPointer(globalPointer); // assert localID == localPointer; indexWriter[0].writeDocumentPointer(obs, localID); if (havePayloads) { indexWriter[0].writePayload(obs, payload); } if (haveCounts) indexWriter[0].writePositionCount(obs, count); if (havePositions) { indexWriter[0].writeDocumentPositions(obs, d.pos, 0, count, sizeList != null ? sizeList.getInt(globalPointer) : -1); } } sumMaxPos = 0; } else { sumMaxPos = 0; } localFrequency = 0; pl.count += frequency - 1; pl.update(); list.clear(); } globalFrequencies.close(); pl.done(); Properties globalProperties = new Properties(); globalProperties.setProperty(Index.PropertyKeys.FIELD, inputProperties.getProperty(Index.PropertyKeys.FIELD)); globalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR, inputProperties.getProperty(Index.PropertyKeys.TERMPROCESSOR)); localTerms[0].close(); indexWriter[0].close(); if (bloomFilterPrecision != 0) BinIO.storeObject(bloomFilter, localBasename[0] + DocumentalCluster.BLOOM_EXTENSION); Properties localProperties = indexWriter[0].properties(); localProperties.addAll(globalProperties); localProperties.setProperty(Index.PropertyKeys.MAXCOUNT, String.valueOf(maxDocPos[0])); localProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, maxDocSize[0]); localProperties.setProperty(Index.PropertyKeys.FIELD, globalProperties.getProperty(Index.PropertyKeys.FIELD)); localProperties.setProperty(Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[0] : -1); localProperties.setProperty(Index.PropertyKeys.POSTINGS, numPostings[0]); localProperties.setProperty(Index.PropertyKeys.TERMS, numTerms[0]); if (havePayloads) localProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName()); if (strategyProperties != null && strategyProperties[0] != null) localProperties.addAll(strategyProperties[0]); // add global properties localProperties.addProperty(globalPropertyKeys.G_MAXCOUNT, inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT)); localProperties.addProperty(globalPropertyKeys.G_MAXDOCSIZE, inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE)); localProperties.addProperty(globalPropertyKeys.G_POSTINGS, inputProperties.getProperty(Index.PropertyKeys.POSTINGS)); localProperties.addProperty(globalPropertyKeys.G_OCCURRENCES, inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES)); localProperties.addProperty(globalPropertyKeys.G_DOCUMENTS, inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS)); localProperties.addProperty(globalPropertyKeys.G_TERMS, inputProperties.getProperty(Index.PropertyKeys.TERMS)); localProperties.save(localBasename[0] + DiskBasedIndex.PROPERTIES_EXTENSION); if (strategyFilename != null) globalProperties.setProperty(IndexCluster.PropertyKeys.STRATEGY, strategyFilename); globalProperties.addProperty(IndexCluster.PropertyKeys.LOCALINDEX, localBasename[0]); globalProperties.setProperty(DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0); // If we partition an index with a single term, by definition we have a flat cluster globalProperties.setProperty(DocumentalCluster.PropertyKeys.FLAT, inputProperties.getLong(Index.PropertyKeys.TERMS) <= 1); globalProperties.setProperty(Index.PropertyKeys.MAXCOUNT, inputProperties.getProperty(Index.PropertyKeys.MAXCOUNT)); globalProperties.setProperty(Index.PropertyKeys.MAXDOCSIZE, inputProperties.getProperty(Index.PropertyKeys.MAXDOCSIZE)); globalProperties.setProperty(Index.PropertyKeys.POSTINGS, inputProperties.getProperty(Index.PropertyKeys.POSTINGS)); globalProperties.setProperty(Index.PropertyKeys.OCCURRENCES, inputProperties.getProperty(Index.PropertyKeys.OCCURRENCES)); globalProperties.setProperty(Index.PropertyKeys.DOCUMENTS, inputProperties.getProperty(Index.PropertyKeys.DOCUMENTS)); globalProperties.setProperty(Index.PropertyKeys.TERMS, inputProperties.getProperty(Index.PropertyKeys.TERMS)); if (havePayloads) globalProperties.setProperty(Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName()); /* For the general case, we must rely on a merged cluster. However, if we detect a contiguous * strategy we can optimise a bit. */ globalProperties.setProperty(Index.PropertyKeys.INDEXCLASS, strategy instanceof ContiguousDocumentalStrategy ? DocumentalConcatenatedCluster.class.getName() : DocumentalMergedCluster.class.getName()); globalProperties.save(outputBasename + DiskBasedIndex.PROPERTIES_EXTENSION); LOGGER.debug( "Properties for clustered index " + outputBasename + ": " + new ConfigurationMap(globalProperties)); }