Example usage for java.util.stream Collectors mapping

List of usage examples for java.util.stream Collectors mapping

Introduction

In this page you can find the example usage for java.util.stream Collectors mapping.

Prototype

public static <T, U, A, R> Collector<T, ?, R> mapping(Function<? super T, ? extends U> mapper,
        Collector<? super U, A, R> downstream) 

Source Link

Document

Adapts a Collector accepting elements of type U to one accepting elements of type T by applying a mapping function to each input element before accumulation.

Usage

From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java

@Test
public void testRangePruning() {

    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
    HoodieBloomIndex index = new HoodieBloomIndex(config);

    final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
    partitionToFileIndexInfo.put("2017/10/22",
            Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"),
                    new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"),
                    new BloomIndexFileInfo("f5", "009", "010")));

    JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
            .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"),
                    new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004")))
            .mapToPair(t -> t);/*  w w w. j  ava2s .c  om*/

    List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
            .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();

    assertEquals(10, comparisonKeyList.size());
    Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
            .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(),
                    Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));

    assertEquals(4, recordKeyToFileComps.size());
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
}

From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java

/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils  
 * @param keys// w  w w .  j a  va 2s  .  c o m
 * @param bucket_filter
 * @return
 */
@SuppressWarnings("unchecked")
public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys,
        final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) {
    final Stream<TitanVertex> dups = Lambdas.get(() -> {
        final Map<String, Set<Object>> dedup_query_builder = keys.stream()
                .flatMap(j -> Optionals.streamOf(j.fields(), false))
                .collect(Collectors.groupingBy(kv -> kv.getKey(),
                        Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet())));
        ;

        //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway
        // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely
        // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part
        // of post processing?
        // (NOTE: same remarks apply for edges)
        // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really 
        //  confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....)

        final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce(
                tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()),
                (query1, query2) -> query1 // (can't occur since reduce not parallel)
        );

        return Optionals.streamOf(matching_nodes_query.vertices(), false);
    });

    // Remove false positives, un-authorized nodes, and group by key

    final Map<JsonNode, List<Vertex>> grouped_vertices = dups
            .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields)))
            .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives)
            .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes)
            .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key)
                    Collectors.mapping(t2 -> t2._1(), Collectors.toList())));

    return grouped_vertices;
}

From source file:org.dataconservancy.packaging.tool.integration.PackageGenerationTest.java

/**
 * Insures the models from ModelResources are included in the final package.
 * Currently every model exposed by {@code ModelResources#RESOURCE_MAP}
 * should have a serialization in the final package under the ONT directory
 * per our spec./*www. j a v  a2  s  .  c o  m*/
 *
 * @throws Exception
 */
@Test
public void testOntologiesIncluded() throws Exception {
    PackageState state = initializer.initialize(DCS_PROFILE);
    OpenedPackage openedPackage = packager.createPackage(state, folder.getRoot());
    List<File> models = new ArrayList<>();
    OntDirectoryWalker walker = new OntDirectoryWalker();

    walker.doWalk(openedPackage.getBaseDirectory(), models);

    assertTrue(ModelResources.RESOURCE_MAP.size() > 0);
    assertEquals(ModelResources.RESOURCE_MAP.size(), models.size());
    List<String> packageModelNames = models.stream()
            .collect(Collectors.mapping(File::getName, Collectors.toList()));
    ModelResources.RESOURCE_MAP.values().stream().forEach(resource -> {
        if (resource.startsWith("/")) {
            resource = resource.substring(1, resource.length());
        }
        assertTrue(packageModelNames.contains(resource));
    });

}

From source file:org.apache.hadoop.hbase.client.RawAsyncHBaseAdmin.java

@Override
public CompletableFuture<CacheEvictionStats> clearBlockCache(TableName tableName) {
    CompletableFuture<CacheEvictionStats> future = new CompletableFuture<>();
    addListener(getTableHRegionLocations(tableName), (locations, err) -> {
        if (err != null) {
            future.completeExceptionally(err);
            return;
        }// w  w  w  . j a v a2  s . c om
        Map<ServerName, List<RegionInfo>> regionInfoByServerName = locations.stream()
                .filter(l -> l.getRegion() != null).filter(l -> !l.getRegion().isOffline())
                .filter(l -> l.getServerName() != null).collect(Collectors.groupingBy(l -> l.getServerName(),
                        Collectors.mapping(l -> l.getRegion(), Collectors.toList())));
        List<CompletableFuture<CacheEvictionStats>> futures = new ArrayList<>();
        CacheEvictionStatsAggregator aggregator = new CacheEvictionStatsAggregator();
        for (Map.Entry<ServerName, List<RegionInfo>> entry : regionInfoByServerName.entrySet()) {
            futures.add(clearBlockCache(entry.getKey(), entry.getValue()).whenComplete((stats, err2) -> {
                if (err2 != null) {
                    future.completeExceptionally(unwrapCompletionException(err2));
                } else {
                    aggregator.append(stats);
                }
            }));
        }
        addListener(CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])),
                (ret, err3) -> {
                    if (err3 != null) {
                        future.completeExceptionally(unwrapCompletionException(err3));
                    } else {
                        future.complete(aggregator.sum());
                    }
                });
    });
    return future;
}

From source file:org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore.java

/**
 * Sums the snapshot sizes for each namespace.
 *//*from w ww.  j  a  v a 2s . c  o  m*/
Map<String, Long> groupSnapshotSizesByNamespace(Multimap<TableName, SnapshotWithSize> snapshotsWithSize) {
    return snapshotsWithSize.entries().stream().collect(Collectors.groupingBy(
            // Convert TableName into the namespace string
            (e) -> e.getKey().getNamespaceAsString(),
            // Sum the values for namespace
            Collectors.mapping(Map.Entry::getValue, Collectors.summingLong((sws) -> sws.getSize()))));
}

From source file:org.apache.nifi.remote.PeerDescriptionModifier.java

public PeerDescriptionModifier(final NiFiProperties properties) {
    final Map<Tuple<String, String>, List<Tuple<String, String>>> routeDefinitions = properties
            .getPropertyKeys().stream().filter(propertyKey -> propertyKey.startsWith(PROPERTY_PREFIX))
            .map(propertyKey -> {/*from  ww  w  .  j  av a 2 s  . co  m*/
                final Matcher matcher = PROPERTY_REGEX.matcher(propertyKey);
                if (!matcher.matches()) {
                    throw new IllegalArgumentException(format(
                            "Found an invalid Site-to-Site route definition property '%s'."
                                    + " Routing property keys should be formatted as 'nifi.remote.route.{protocol}.{name}.{routingConfigName}'."
                                    + " Where {protocol} is 'raw' or 'http', and {routingConfigName} is 'when', 'hostname', 'port' or 'secure'.",
                            propertyKey));
                }
                return matcher;
            })
            .collect(Collectors.groupingBy(matcher -> new Tuple<>(matcher.group(1), matcher.group(2)),
                    Collectors.mapping(matcher -> new Tuple<>(matcher.group(3), matcher.group(0)),
                            Collectors.toList())));

    routes = routeDefinitions.entrySet().stream().map(routeDefinition -> {
        final Route route = new Route();
        // E.g. [raw, example1], [http, example2]
        final Tuple<String, String> protocolAndRoutingName = routeDefinition.getKey();
        route.protocol = SiteToSiteTransportProtocol.valueOf(protocolAndRoutingName.getKey().toUpperCase());
        route.name = protocolAndRoutingName.getValue();
        routeDefinition.getValue().forEach(routingConfigNameAndPropertyKey -> {
            final String routingConfigName = routingConfigNameAndPropertyKey.getKey();
            final String propertyKey = routingConfigNameAndPropertyKey.getValue();
            final String routingConfigValue = properties.getProperty(propertyKey);
            try {
                switch (routingConfigName) {
                case "when":
                    route.predicate = Query.prepare(routingConfigValue);
                    break;
                case "hostname":
                    route.hostname = Query.prepare(routingConfigValue);
                    break;
                case "port":
                    route.port = Query.prepare(routingConfigValue);
                    break;
                case "secure":
                    route.secure = Query.prepare(routingConfigValue);
                    break;
                }
            } catch (AttributeExpressionLanguageParsingException e) {
                throw new IllegalArgumentException(format(
                        "Failed to parse NiFi expression language configured"
                                + " for Site-to-Site routing property at '%s' due to '%s'",
                        propertyKey, e.getMessage()), e);
            }
        });
        return route;
    }).map(Route::validate).collect(Collectors.groupingBy(r -> r.protocol));

}

From source file:org.codelibs.fess.app.web.admin.backup.AdminBackupAction.java

public static Consumer<Writer> getSearchLogNdjsonWriteCall() {
    return writer -> {
        final SearchLogBhv bhv = ComponentUtil.getComponent(SearchLogBhv.class);
        bhv.selectCursor(cb -> {/*from  w ww.  java  2 s. c o m*/
            cb.query().matchAll();
            cb.query().addOrderBy_RequestedAt_Asc();
        }, entity -> {
            final StringBuilder buf = new StringBuilder();
            buf.append('{');
            appendJson("id", entity.getId(), buf).append(',');
            appendJson("query-id", entity.getQueryId(), buf).append(',');
            appendJson("user-info-id", entity.getUserInfoId(), buf).append(',');
            appendJson("user-session-id", entity.getUserSessionId(), buf).append(',');
            appendJson("user", entity.getUser(), buf).append(',');
            appendJson("search-word", entity.getSearchWord(), buf).append(',');
            appendJson("hit-count", entity.getHitCount(), buf).append(',');
            appendJson("query-page-size", entity.getQueryPageSize(), buf).append(',');
            appendJson("query-offset", entity.getQueryOffset(), buf).append(',');
            appendJson("referer", entity.getReferer(), buf).append(',');
            appendJson("languages", entity.getLanguages(), buf).append(',');
            appendJson("roles", entity.getRoles(), buf).append(',');
            appendJson("user-agent", entity.getUserAgent(), buf).append(',');
            appendJson("client-ip", entity.getClientIp(), buf).append(',');
            appendJson("access-type", entity.getAccessType(), buf).append(',');
            appendJson("query-time", entity.getQueryTime(), buf).append(',');
            appendJson("response-time", entity.getResponseTime(), buf).append(',');
            appendJson("requested-at", entity.getRequestedAt(), buf).append(',');
            final Map<String, List<String>> searchFieldMap = entity.getSearchFieldLogList().stream()
                    .collect(Collectors.groupingBy(Pair::getFirst,
                            Collectors.mapping(Pair::getSecond, Collectors.toList())));
            appendJson("search-field", searchFieldMap, buf);
            buf.append('}');
            buf.append('\n');
            try {
                writer.write(buf.toString());
            } catch (final IOException e) {
                throw new IORuntimeException(e);
            }
        });
    };
}

From source file:org.codice.alliance.nsili.common.ResultDAGConverter.java

private static Map<String, List<String>> getAttrMap(List<String> attributes) {
    return attributes.stream().map(ATTRIBUTE_PATTERN::matcher).filter(Matcher::matches).collect(
            Collectors.groupingBy(m -> m.group(2), Collectors.mapping(m -> m.group(3), Collectors.toList())));
}

From source file:org.codice.ddf.admin.application.service.migratable.FeatureProcessor.java

/**
 * Updates the specified features requirements to mark them required or not.
 *
 * @param report the report where to record errors if unable to update the features
 * @param region the region where to update the features
 * @param jfeatures the features to update
 * @return <code>true</code> if the features were updated successfully; <code>false</code>
 *     otherwise// ww w.ja v  a2  s.co m
 */
public boolean updateFeaturesRequirements(ProfileMigrationReport report, String region,
        Set<JsonFeature> jfeatures) {
    return run(report, region, jfeatures.stream().map(JsonFeature::getId), Operation.UPDATE, jfeatures.stream()
            .collect(Collectors.groupingBy(JsonFeature::isRequired,
                    Collectors.mapping(JsonFeature::toRequirement, Collectors.toSet())))
            .entrySet().stream()
            .map(requirementsToUpdate -> updateFeaturesRequirements(region, requirementsToUpdate))
            .toArray(ThrowingRunnable[]::new));
}

From source file:org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler.java

/**
 * Initializes a cache for the given elements where every key maps to multiple elements.
 * Key selector will be called on every element to extract the caches key.
 * Value selector will be called on every element to extract the value.
 * Returns a cache of the form//from   w  ww.  j a v a2s . com
 * KT -> Set<VT>
 *
 * @param elements elements the cache will be build from
 * @param keySelector key selector function extraction cache keys from elements
 * @param valueSelector value selector function extraction cache values from elements
 * @param <EL> the element type
 * @param <KT> the cache key type
 * @param <VT> the cache value type
 * @return cache KT -> Set<VT>
 */
private <EL, KT, VT> Map<KT, Set<VT>> initSetCache(Collection<EL> elements, Function<EL, KT> keySelector,
        Function<EL, VT> valueSelector) {

    return elements.stream()
            .collect(Collectors.groupingBy(keySelector, Collectors.mapping(valueSelector, Collectors.toSet())));
}