org.apache.beam.sdk.values.PCollectionList.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.beam.sdk.values.PCollectionList.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.sdk.values;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.transforms.Flatten;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.Partition;

/**
 * A {@link PCollectionList PCollectionList<T>} is an immutable list of homogeneously
 * typed {@link PCollection PCollection<T>s}. A {@link PCollectionList} is used, for
 * instance, as the input to
 * {@link Flatten} or the output of {@link Partition}.
 *
 * <p>PCollectionLists can be created and accessed like follows:
 * <pre> {@code
 * PCollection<String> pc1 = ...;
 * PCollection<String> pc2 = ...;
 * PCollection<String> pc3 = ...;
 *
 * // Create a PCollectionList with three PCollections:
 * PCollectionList<String> pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
 *
 * // Create an empty PCollectionList:
 * Pipeline p = ...;
 * PCollectionList<String> pcs2 = PCollectionList.<String>empty(p);
 *
 * // Get PCollections out of a PCollectionList, by index (origin 0):
 * PCollection<String> pcX = pcs.get(1);
 * PCollection<String> pcY = pcs.get(0);
 * PCollection<String> pcZ = pcs.get(2);
 *
 * // Get a list of all PCollections in a PCollectionList:
 * List<PCollection<String>> allPcs = pcs.getAll();
 * } </pre>
 *
 * @param <T> the type of the elements of all the {@link PCollection PCollections} in this list
 */
public class PCollectionList<T> implements PInput, POutput {
    /**
     * Returns an empty {@link PCollectionList} that is part of the given {@link Pipeline}.
     *
     * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
     * {@link #and} on the result.
     */
    public static <T> PCollectionList<T> empty(Pipeline pipeline) {
        return new PCollectionList<>(pipeline);
    }

    /**
     * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}.
     *
     * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling
     * {@link #and} on the result.
     */
    public static <T> PCollectionList<T> of(PCollection<T> pc) {
        return new PCollectionList<T>(pc.getPipeline()).and(pc);
    }

    /**
     * Returns a {@link PCollectionList} containing the given {@link PCollection PCollections},
     * in order.
     *
     * <p>The argument list cannot be empty.
     *
     * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
     * part of the same {@link Pipeline}.
     *
     * <p>Longer PCollectionLists can be created by calling
     * {@link #and} on the result.
     */
    public static <T> PCollectionList<T> of(Iterable<PCollection<T>> pcs) {
        Iterator<PCollection<T>> pcsIter = pcs.iterator();
        if (!pcsIter.hasNext()) {
            throw new IllegalArgumentException(
                    "must either have a non-empty list of PCollections, " + "or must first call empty(Pipeline)");
        }
        return new PCollectionList<T>(pcsIter.next().getPipeline()).and(pcs);
    }

    /**
     * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
     * this {@link PCollectionList} plus the given {@link PCollection} appended to the end.
     *
     * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
     * part of the same {@link Pipeline}.
     */
    public PCollectionList<T> and(PCollection<T> pc) {
        if (pc.getPipeline() != pipeline) {
            throw new IllegalArgumentException("PCollections come from different Pipelines");
        }
        return new PCollectionList<>(pipeline, ImmutableList.<TaggedPValue>builder().addAll(pcollections)
                .add(TaggedPValue.of(new TupleTag<T>(), pc)).build());
    }

    /**
     * Returns a new {@link PCollectionList} that has all the {@link PCollection PCollections} of
     * this {@link PCollectionList} plus the given {@link PCollection PCollections} appended to the
     * end, in order.
     *
     * <p>All the {@link PCollection PCollections} in the resulting {@link PCollectionList} must be
     * part of the same {@link Pipeline}.
     */
    public PCollectionList<T> and(Iterable<PCollection<T>> pcs) {
        ImmutableList.Builder<TaggedPValue> builder = ImmutableList.builder();
        builder.addAll(pcollections);
        for (PCollection<T> pc : pcs) {
            if (pc.getPipeline() != pipeline) {
                throw new IllegalArgumentException("PCollections come from different Pipelines");
            }
            builder.add(TaggedPValue.of(new TupleTag<T>(), pc));
        }
        return new PCollectionList<>(pipeline, builder.build());
    }

    /**
     * Returns the number of {@link PCollection PCollections} in this {@link PCollectionList}.
     */
    public int size() {
        return pcollections.size();
    }

    /**
     * Returns the {@link PCollection} at the given index (origin zero).
     *
     * @throws IndexOutOfBoundsException if the index is out of the range
     * {@code [0..size()-1]}.
     */
    public PCollection<T> get(int index) {
        @SuppressWarnings("unchecked") // Type-safe by construction
        PCollection<T> value = (PCollection<T>) pcollections.get(index).getValue();
        return value;
    }

    /**
     * Returns an immutable List of all the {@link PCollection PCollections} in this
     * {@link PCollectionList}.
     */
    public List<PCollection<T>> getAll() {
        ImmutableList.Builder<PCollection<T>> res = ImmutableList.builder();
        for (TaggedPValue value : pcollections) {
            @SuppressWarnings("unchecked") // Type-safe by construction
            PCollection<T> typedValue = (PCollection<T>) value.getValue();
            res.add(typedValue);
        }
        return res.build();
    }

    /**
     * Like {@link #apply(String, PTransform)} but defaulting to the name
     * of the {@code PTransform}.
     */
    public <OutputT extends POutput> OutputT apply(PTransform<PCollectionList<T>, OutputT> t) {
        return Pipeline.applyTransform(this, t);
    }

    /**
     * Applies the given {@link PTransform} to this input {@link PCollectionList},
     * using {@code name} to identify this specific application of the transform.
     * This name is used in various places, including the monitoring UI, logging,
     * and to stably identify this application node in the job graph.
     *
     * @return the output of the applied {@link PTransform}
     */
    public <OutputT extends POutput> OutputT apply(String name, PTransform<PCollectionList<T>, OutputT> t) {
        return Pipeline.applyTransform(name, this, t);
    }

    /////////////////////////////////////////////////////////////////////////////
    // Internal details below here.

    final Pipeline pipeline;
    /**
     * The {@link PCollection PCollections} contained by this {@link PCollectionList}, and an
     * arbitrary tags associated with each.
     */
    final List<TaggedPValue> pcollections;

    PCollectionList(Pipeline pipeline) {
        this(pipeline, ImmutableList.<TaggedPValue>of());
    }

    PCollectionList(Pipeline pipeline, List<TaggedPValue> values) {
        this.pipeline = pipeline;
        this.pcollections = ImmutableList.copyOf(values);
    }

    @Override
    public Pipeline getPipeline() {
        return pipeline;
    }

    @Override
    public Map<TupleTag<?>, PValue> expand() {
        ImmutableMap.Builder<TupleTag<?>, PValue> expanded = ImmutableMap.builder();
        for (TaggedPValue tagged : pcollections) {
            expanded.put(tagged.getTag(), tagged.getValue());
        }
        return expanded.build();
    }

    @Override
    public void finishSpecifyingOutput(String transformName, PInput input, PTransform<?, ?> transform) {
        // All component PCollections will have already been finished.
        int i = 0;
        for (TaggedPValue tpv : pcollections) {
            @SuppressWarnings("unchecked")
            PCollection<T> pc = (PCollection<T>) tpv.getValue();
            if (pc.getName().equals(PValueBase.defaultName(transformName))) {
                pc.setName(String.format("%s.%s%s", transformName, "out", i));
            }
            i++;
        }
    }

    @Override
    public boolean equals(Object other) {
        if (!(other instanceof PCollectionList)) {
            return false;
        }
        PCollectionList that = (PCollectionList) other;
        return this.pipeline.equals(that.pipeline) && this.getAll().equals(that.getAll());
    }

    @Override
    public int hashCode() {
        return Objects.hash(this.pipeline, this.getAll());
    }
}