Java tutorial
/* * Licensed to the University of California, Berkeley under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package tachyon.client.keyvalue.hadoop; import java.io.IOException; import java.util.List; import javax.annotation.concurrent.ThreadSafe; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.FileOutputCommitter; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.mapred.TaskAttemptContext; import com.google.common.collect.Lists; import tachyon.TachyonURI; import tachyon.annotation.PublicApi; import tachyon.client.keyvalue.KeyValueStores; import tachyon.exception.TachyonException; /** * Extension of {@link FileOutputCommitter} where creating, completing, or deleting a * {@link KeyValueStores} in different phases of a job's or task's lifecycle is considered. * <p> * This committer must be used along with {@link KeyValueOutputFormat} to merge the key-value stores * created by each Reducer into one key-value store under the MapReduce output directory. */ @PublicApi @ThreadSafe public final class KeyValueOutputCommitter extends FileOutputCommitter { private static final KeyValueStores KEY_VALUE_STORES = KeyValueStores.Factory.create(); private List<TachyonURI> getTaskTemporaryStores(JobConf conf) throws IOException { TachyonURI taskOutputURI = KeyValueOutputFormat.getTaskOutputURI(conf); Path taskOutputPath = new Path(taskOutputURI.toString()); FileSystem fs = taskOutputPath.getFileSystem(conf); FileStatus[] subDirs = fs.listStatus(taskOutputPath); List<TachyonURI> temporaryStores = Lists.newArrayListWithExpectedSize(subDirs.length); for (FileStatus subDir : subDirs) { temporaryStores.add(taskOutputURI.join(subDir.getPath().getName())); } return temporaryStores; } /** * {@inheritDoc} * <p> * Merges the completed key-value stores under the task's temporary output directory to the * key-value store created in {@link #setupJob(JobContext)}, then calls * {@link FileOutputCommitter#commitTask(TaskAttemptContext)}. */ @Override public void commitTask(TaskAttemptContext context) throws IOException { JobConf conf = context.getJobConf(); TachyonURI jobOutputURI = KeyValueOutputFormat.getJobOutputURI(conf); for (TachyonURI tempStoreUri : getTaskTemporaryStores(conf)) { try { KEY_VALUE_STORES.merge(tempStoreUri, jobOutputURI); } catch (TachyonException e) { throw new IOException(e); } } super.commitTask(context); } /** * {@inheritDoc} * <p> * Deletes the completed key-value stores under the task's temporary output directory, and then * calls {@link FileOutputCommitter#abortTask(TaskAttemptContext)}. */ @Override public void abortTask(TaskAttemptContext context) throws IOException { for (TachyonURI tempStoreUri : getTaskTemporaryStores(context.getJobConf())) { try { KEY_VALUE_STORES.delete(tempStoreUri); } catch (TachyonException e) { throw new IOException(e); } } super.abortTask(context); } }