Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.tdunning.plume.local.lazy; import java.io.IOException; import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.google.common.io.Resources; import com.tdunning.plume.PCollection; import com.tdunning.plume.PTable; import com.tdunning.plume.Pair; import com.tdunning.plume.Plume; import com.tdunning.plume.avro.AvroFile; import com.tdunning.plume.local.lazy.op.Flatten; import com.tdunning.plume.types.PCollectionType; import com.tdunning.plume.types.PTableType; import com.tdunning.plume.types.PType; import com.tdunning.plume.types.StringType; /** * Runtime for Plume implementing deferred execution and optimization. */ public class LazyPlume extends Plume { /** * Just points to a file, doesn't read it * * @param <T> * @param name * @return * @throws IOException */ public <T> PCollection<T> readFile(String name, PCollectionType type) throws IOException { LazyCollection<T> coll = new LazyCollection<T>(); coll.materialized = true; coll.type = type; coll.setFile(name); return coll; } public <T> PCollection<T> fromJava(Iterable<T> source, PCollectionType type) { return new LazyCollection<T>(source, type); } public <K, V> PTable<K, V> fromJava(Iterable<Pair<K, V>> source, PTableType type) { return new LazyTable<K, V>(source, type); } public <T> PCollection<T> flatten(PCollectionType type, PCollection<T>... args) { LazyCollection<T> dest = new LazyCollection<T>(); Flatten<T> flatten = new Flatten<T>(Lists.newArrayList(args), dest); dest.deferredOp = flatten; dest.type = type; for (PCollection<T> col : args) { ((LazyCollection<T>) col).addDownOp(flatten); } return dest; } public <K, V> PTable<K, V> flatten(PTableType type, PCollection<Pair<K, V>>... args) { LazyTable<K, V> dest = new LazyTable<K, V>(); Flatten<Pair<K, V>> flatten = new Flatten<Pair<K, V>>(Lists.newArrayList(args), dest); dest.deferredOp = flatten; dest.type = type; for (PCollection<Pair<K, V>> col : args) { ((LazyCollection<Pair<K, V>>) col).addDownOp(flatten); } return dest; } @Override public <T> PCollection<T> flatten(PCollection<T>... args) { return flatten(((LazyCollection<T>) args[0]).type, args); } @Override public <T> void writeAvroFile(String name, PCollection<T> data, PType<T> type) { throw new RuntimeException("Not done"); } @Override public <T> PCollection<T> fromJava(Iterable<T> source) { return fromJava(source, new PCollectionType(new StringType())); } @Override public PCollection<String> readTextFile(String name) throws IOException { return readFile(name, new PCollectionType(new StringType())); } /** * I guess the convention is that resource files are small enough to be read in memory */ @Override public PCollection<String> readResourceFile(String name) throws IOException { return fromJava(Resources.readLines(Resources.getResource(name), Charsets.UTF_8)); } @Override public <T> PCollection<T> readAvroFile(String name, PType<T> type) { return new AvroFile<T>(name, type); } }