Example usage for java.util.stream Collectors mapping

Introduction

In this page you can find the example usage for java.util.stream Collectors mapping.

Prototype

public static <T, U, A, R> Collector<T, ?, R> mapping(Function<? super T, ? extends U> mapper,
        Collector<? super U, A, R> downstream)

Source Link

Document

Adapts a Collector accepting elements of type U to one accepting elements of type T by applying a mapping function to each input element before accumulation.

Usage

From source file:com.uber.hoodie.index.bloom.TestHoodieBloomIndex.java

@Test
public void testRangePruning() {

    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
    HoodieBloomIndex index = new HoodieBloomIndex(config);

    final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
    partitionToFileIndexInfo.put("2017/10/22",
            Arrays.asList(new BloomIndexFileInfo("f1"), new BloomIndexFileInfo("f2", "000", "000"),
                    new BloomIndexFileInfo("f3", "001", "003"), new BloomIndexFileInfo("f4", "002", "007"),
                    new BloomIndexFileInfo("f5", "009", "010")));

    JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
            .parallelize(Arrays.asList(new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"),
                    new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/22", "004")))
            .mapToPair(t -> t);/*  w w w. j  ava2s .c  om*/

    List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
            .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();

    assertEquals(10, comparisonKeyList.size());
    Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
            .collect(Collectors.groupingBy(t -> t._2()._2().getRecordKey(),
                    Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));

    assertEquals(4, recordKeyToFileComps.size());
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
}

From source file:com.ikanow.aleph2.graph.titan.utils.TitanGraphBuildingUtils.java

/** Utility to get the vertices in the DB matching the specified keys TODO: move to intermediate utils  
 * @param keys// w  w w .  j a  va 2s  .  c o m
 * @param bucket_filter
 * @return
 */
@SuppressWarnings("unchecked")
public static final Map<JsonNode, List<Vertex>> getGroupedVertices(final Collection<ObjectNode> keys,
        final TitanTransaction tx, final List<String> key_fields, final Predicate<Vertex> vertex_filter) {
    final Stream<TitanVertex> dups = Lambdas.get(() -> {
        final Map<String, Set<Object>> dedup_query_builder = keys.stream()
                .flatMap(j -> Optionals.streamOf(j.fields(), false))
                .collect(Collectors.groupingBy(kv -> kv.getKey(),
                        Collectors.mapping(kv -> jsonNodeToObject(kv.getValue()), Collectors.toSet())));
        ;

        //TODO (ALEPH-15): would be nice to support custom "fuzzier" queries, since we're doing a dedup stage to pick the actual winning vertices anyway
        // that way you could say query on tokenized-version of name and get anyone with the same first or last name (say) and then pick the most likely
        // one based on the graph ... of course you'd probably want the full graph for that, so it might end up being better served as a "self-analytic" to do is part
        // of post processing?
        // (NOTE: same remarks apply for edges)
        // (NOTE: currently I've been going in the opposite direction, ie enforcing only one vertex per keyset per bucket ... otherwise it's going to get really 
        //  confusing when you try to merge all the different versions that Titan creates because of the lack of an upsert function....)

        final TitanGraphQuery<?> matching_nodes_query = dedup_query_builder.entrySet().stream().reduce(
                tx.query(), (query, kv) -> query.has(kv.getKey(), Contain.IN, kv.getValue()),
                (query1, query2) -> query1 // (can't occur since reduce not parallel)
        );

        return Optionals.streamOf(matching_nodes_query.vertices(), false);
    });

    // Remove false positives, un-authorized nodes, and group by key

    final Map<JsonNode, List<Vertex>> grouped_vertices = dups
            .map(vertex -> Tuples._2T((Vertex) vertex, getElementProperties(vertex, key_fields)))
            .filter(vertex_key -> keys.contains(vertex_key._2())) // (remove false positives)
            .filter(vertex_key -> vertex_filter.test(vertex_key._1())) // (remove un-authorized nodes)
            .collect(Collectors.groupingBy(t2 -> (JsonNode) t2._2(), // (group by key)
                    Collectors.mapping(t2 -> t2._1(), Collectors.toList())));

    return grouped_vertices;
}

From source file:org.dataconservancy.packaging.tool.integration.PackageGenerationTest.java

/**
 * Insures the models from ModelResources are included in the final package.
 * Currently every model exposed by {@code ModelResources#RESOURCE_MAP}
 * should have a serialization in the final package under the ONT directory
 * per our spec./*www. j a v  a2  s  .  c o  m*/
 *
 * @throws Exception
 */
@Test
public void testOntologiesIncluded() throws Exception {
    PackageState state = initializer.initialize(DCS_PROFILE);
    OpenedPackage openedPackage = packager.createPackage(state, folder.getRoot());
    List<File> models = new ArrayList<>();
    OntDirectoryWalker walker = new OntDirectoryWalker();

    walker.doWalk(openedPackage.getBaseDirectory(), models);

    assertTrue(ModelResources.RESOURCE_MAP.size() > 0);
    assertEquals(ModelResources.RESOURCE_MAP.size(), models.size());
    List<String> packageModelNames = models.stream()
            .collect(Collectors.mapping(File::getName, Collectors.toList()));
    ModelResources.RESOURCE_MAP.values().stream().forEach(resource -> {
        if (resource.startsWith("/")) {
            resource = resource.substring(1, resource.length());
        }
        assertTrue(packageModelNames.contains(resource));
    });

}

From source file:org.apache.hadoop.hbase.client.RawAsyncHBaseAdmin.java

@Override
public CompletableFuture<CacheEvictionStats> clearBlockCache(TableName tableName) {
    CompletableFuture<CacheEvictionStats> future = new CompletableFuture<>();
    addListener(getTableHRegionLocations(tableName), (locations, err) -> {
        if (err != null) {
            future.completeExceptionally(err);
            return;
        }// w  w  w  . j a v a2  s . c om
        Map<ServerName, List<RegionInfo>> regionInfoByServerName = locations.stream()
                .filter(l -> l.getRegion() != null).filter(l -> !l.getRegion().isOffline())
                .filter(l -> l.getServerName() != null).collect(Collectors.groupingBy(l -> l.getServerName(),
                        Collectors.mapping(l -> l.getRegion(), Collectors.toList())));
        List<CompletableFuture<CacheEvictionStats>> futures = new ArrayList<>();
        CacheEvictionStatsAggregator aggregator = new CacheEvictionStatsAggregator();
        for (Map.Entry<ServerName, List<RegionInfo>> entry : regionInfoByServerName.entrySet()) {
            futures.add(clearBlockCache(entry.getKey(), entry.getValue()).whenComplete((stats, err2) -> {
                if (err2 != null) {
                    future.completeExceptionally(unwrapCompletionException(err2));
                } else {
                    aggregator.append(stats);
                }
            }));
        }
        addListener(CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])),
                (ret, err3) -> {
                    if (err3 != null) {
                        future.completeExceptionally(unwrapCompletionException(err3));
                    } else {
                        future.complete(aggregator.sum());
                    }
                });
    });
    return future;
}

From source file:org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore.java

/**
 * Sums the snapshot sizes for each namespace.
 *//*from w ww.  j  a  v a 2s . c  o  m*/
Map<String, Long> groupSnapshotSizesByNamespace(Multimap<TableName, SnapshotWithSize> snapshotsWithSize) {
    return snapshotsWithSize.entries().stream().collect(Collectors.groupingBy(
            // Convert TableName into the namespace string
            (e) -> e.getKey().getNamespaceAsString(),
            // Sum the values for namespace
            Collectors.mapping(Map.Entry::getValue, Collectors.summingLong((sws) -> sws.getSize()))));
}

From source file:org.apache.nifi.remote.PeerDescriptionModifier.java

public PeerDescriptionModifier(final NiFiProperties properties) {
    final Map<Tuple<String, String>, List<Tuple<String, String>>> routeDefinitions = properties
            .getPropertyKeys().stream().filter(propertyKey -> propertyKey.startsWith(PROPERTY_PREFIX))
            .map(propertyKey -> {/*from  ww  w  .  j  av a 2 s  . co  m*/
                final Matcher matcher = PROPERTY_REGEX.matcher(propertyKey);
                if (!matcher.matches()) {
                    throw new IllegalArgumentException(format(
                            "Found an invalid Site-to-Site route definition property '%s'."
                                    + " Routing property keys should be formatted as 'nifi.remote.route.{protocol}.{name}.{routingConfigName}'."
                                    + " Where {protocol} is 'raw' or 'http', and {routingConfigName} is 'when', 'hostname', 'port' or 'secure'.",
                            propertyKey));
                }
                return matcher;
            })
            .collect(Collectors.groupingBy(matcher -> new Tuple<>(matcher.group(1), matcher.group(2)),
                    Collectors.mapping(matcher -> new Tuple<>(matcher.group(3), matcher.group(0)),
                            Collectors.toList())));

    routes = routeDefinitions.entrySet().stream().map(routeDefinition -> {
        final Route route = new Route();
        // E.g. [raw, example1], [http, example2]
        final Tuple<String, String> protocolAndRoutingName = routeDefinition.getKey();
        route.protocol = SiteToSiteTransportProtocol.valueOf(protocolAndRoutingName.getKey().toUpperCase());
        route.name = protocolAndRoutingName.getValue();
        routeDefinition.getValue().forEach(routingConfigNameAndPropertyKey -> {
            final String routingConfigName = routingConfigNameAndPropertyKey.getKey();
            final String propertyKey = routingConfigNameAndPropertyKey.getValue();
            final String routingConfigValue = properties.getProperty(propertyKey);
            try {
                switch (routingConfigName) {
                case "when":
                    route.predicate = Query.prepare(routingConfigValue);
                    break;
                case "hostname":
                    route.hostname = Query.prepare(routingConfigValue);
                    break;
                case "port":
                    route.port = Query.prepare(routingConfigValue);
                    break;
                case "secure":
                    route.secure = Query.prepare(routingConfigValue);
                    break;
                }
            } catch (AttributeExpressionLanguageParsingException e) {
                throw new IllegalArgumentException(format(
                        "Failed to parse NiFi expression language configured"
                                + " for Site-to-Site routing property at '%s' due to '%s'",
                        propertyKey, e.getMessage()), e);
            }
        });
        return route;
    }).map(Route::validate).collect(Collectors.groupingBy(r -> r.protocol));

}

From source file:org.codelibs.fess.app.web.admin.backup.AdminBackupAction.java

public static Consumer<Writer> getSearchLogNdjsonWriteCall() {
    return writer -> {
        final SearchLogBhv bhv = ComponentUtil.getComponent(SearchLogBhv.class);
        bhv.selectCursor(cb -> {/*from  w ww.  java  2 s. c o m*/
            cb.query().matchAll();
            cb.query().addOrderBy_RequestedAt_Asc();
        }, entity -> {
            final StringBuilder buf = new StringBuilder();
            buf.append('{');
            appendJson("id", entity.getId(), buf).append(',');
            appendJson("query-id", entity.getQueryId(), buf).append(',');
            appendJson("user-info-id", entity.getUserInfoId(), buf).append(',');
            appendJson("user-session-id", entity.getUserSessionId(), buf).append(',');
            appendJson("user", entity.getUser(), buf).append(',');
            appendJson("search-word", entity.getSearchWord(), buf).append(',');
            appendJson("hit-count", entity.getHitCount(), buf).append(',');
            appendJson("query-page-size", entity.getQueryPageSize(), buf).append(',');
            appendJson("query-offset", entity.getQueryOffset(), buf).append(',');
            appendJson("referer", entity.getReferer(), buf).append(',');
            appendJson("languages", entity.getLanguages(), buf).append(',');
            appendJson("roles", entity.getRoles(), buf).append(',');
            appendJson("user-agent", entity.getUserAgent(), buf).append(',');
            appendJson("client-ip", entity.getClientIp(), buf).append(',');
            appendJson("access-type", entity.getAccessType(), buf).append(',');
            appendJson("query-time", entity.getQueryTime(), buf).append(',');
            appendJson("response-time", entity.getResponseTime(), buf).append(',');
            appendJson("requested-at", entity.getRequestedAt(), buf).append(',');
            final Map<String, List<String>> searchFieldMap = entity.getSearchFieldLogList().stream()
                    .collect(Collectors.groupingBy(Pair::getFirst,
                            Collectors.mapping(Pair::getSecond, Collectors.toList())));
            appendJson("search-field", searchFieldMap, buf);
            buf.append('}');
            buf.append('\n');
            try {
                writer.write(buf.toString());
            } catch (final IOException e) {
                throw new IORuntimeException(e);
            }
        });
    };
}

From source file:org.codice.alliance.nsili.common.ResultDAGConverter.java

private static Map<String, List<String>> getAttrMap(List<String> attributes) {
    return attributes.stream().map(ATTRIBUTE_PATTERN::matcher).filter(Matcher::matches).collect(
            Collectors.groupingBy(m -> m.group(2), Collectors.mapping(m -> m.group(3), Collectors.toList())));
}

From source file:org.codice.ddf.admin.application.service.migratable.FeatureProcessor.java

/**
 * Updates the specified features requirements to mark them required or not.
 *
 * @param report the report where to record errors if unable to update the features
 * @param region the region where to update the features
 * @param jfeatures the features to update
 * @return <code>true</code> if the features were updated successfully; <code>false</code>
 *     otherwise// ww w.ja v  a2  s.co m
 */
public boolean updateFeaturesRequirements(ProfileMigrationReport report, String region,
        Set<JsonFeature> jfeatures) {
    return run(report, region, jfeatures.stream().map(JsonFeature::getId), Operation.UPDATE, jfeatures.stream()
            .collect(Collectors.groupingBy(JsonFeature::isRequired,
                    Collectors.mapping(JsonFeature::toRequirement, Collectors.toSet())))
            .entrySet().stream()
            .map(requirementsToUpdate -> updateFeaturesRequirements(region, requirementsToUpdate))
            .toArray(ThrowingRunnable[]::new));
}

From source file:org.gradoop.flink.model.impl.operators.matching.common.query.QueryHandler.java

/**
 * Initializes a cache for the given elements where every key maps to multiple elements.
 * Key selector will be called on every element to extract the caches key.
 * Value selector will be called on every element to extract the value.
 * Returns a cache of the form//from   w  ww.  j a v a2s . com
 * KT -> Set<VT>
 *
 * @param elements elements the cache will be build from
 * @param keySelector key selector function extraction cache keys from elements
 * @param valueSelector value selector function extraction cache values from elements
 * @param <EL> the element type
 * @param <KT> the cache key type
 * @param <VT> the cache value type
 * @return cache KT -> Set<VT>
 */
private <EL, KT, VT> Map<KT, Set<VT>> initSetCache(Collection<EL> elements, Function<EL, KT> keySelector,
        Function<EL, VT> valueSelector) {

    return elements.stream()
            .collect(Collectors.groupingBy(keySelector, Collectors.mapping(valueSelector, Collectors.toSet())));
}