Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.fluo.mapreduce; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map.Entry; import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat; import org.apache.accumulo.core.client.mapreduce.RangeInputSplit; import org.apache.accumulo.core.client.security.tokens.PasswordToken; import org.apache.fluo.api.config.FluoConfiguration; import org.apache.fluo.api.config.ScannerConfiguration; import org.apache.fluo.api.config.SimpleConfiguration; import org.apache.fluo.api.data.Bytes; import org.apache.fluo.api.data.Span; import org.apache.fluo.api.iterator.ColumnIterator; import org.apache.fluo.api.iterator.RowIterator; import org.apache.fluo.core.impl.Environment; import org.apache.fluo.core.impl.TransactionImpl; import org.apache.fluo.core.util.SpanUtil; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; /** * This input format reads a consistent snapshot of Fluo rows from a Fluo table. */ public class FluoRowInputFormat extends InputFormat<Bytes, ColumnIterator> { private static String TIMESTAMP_CONF_KEY = FluoRowInputFormat.class.getName() + ".timestamp"; private static String PROPS_CONF_KEY = FluoRowInputFormat.class.getName() + ".props"; private static String FAMS_CONF_KEY = FluoRowInputFormat.class.getName() + ".families"; @Override public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new RecordReader<Bytes, ColumnIterator>() { private Entry<Bytes, ColumnIterator> entry; private RowIterator rowIter; private Environment env = null; private TransactionImpl ti = null; @Override public void close() throws IOException { if (ti != null) { ti.close(); } if (env != null) { env.close(); } } @Override public Bytes getCurrentKey() throws IOException, InterruptedException { return entry.getKey(); } @Override public ColumnIterator getCurrentValue() throws IOException, InterruptedException { return entry.getValue(); } @Override public float getProgress() throws IOException, InterruptedException { // TODO Auto-generated method stub return 0; } @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { try { ByteArrayInputStream bais = new ByteArrayInputStream( context.getConfiguration().get(PROPS_CONF_KEY).getBytes(StandardCharsets.UTF_8)); env = new Environment(new FluoConfiguration(bais)); ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1)); // TODO this uses non public Accumulo API! RangeInputSplit ris = (RangeInputSplit) split; Span span = SpanUtil.toSpan(ris.getRange()); ScannerConfiguration sc = new ScannerConfiguration().setSpan(span); for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0])) { sc.fetchColumnFamily(Bytes.of(fam)); } rowIter = ti.get(sc); } catch (Exception e) { throw new IOException(e); } } @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (rowIter.hasNext()) { entry = rowIter.next(); return true; } return false; } }; } @Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return new AccumuloInputFormat().getSplits(context); } /** * Configure properties needed to connect to a Fluo application * * @param conf Job configuration * @param config use {@link org.apache.fluo.api.config.FluoConfiguration} to configure * programmatically */ @SuppressWarnings("deprecation") public static void configure(Job conf, SimpleConfiguration config) { try { FluoConfiguration fconfig = new FluoConfiguration(config); try (Environment env = new Environment(fconfig)) { long ts = env.getSharedResources().getTimestampTracker().allocateTimestamp().getTxTimestamp(); conf.getConfiguration().setLong(TIMESTAMP_CONF_KEY, ts); ByteArrayOutputStream baos = new ByteArrayOutputStream(); config.save(baos); conf.getConfiguration().set(PROPS_CONF_KEY, new String(baos.toByteArray(), StandardCharsets.UTF_8)); AccumuloInputFormat.setZooKeeperInstance(conf, fconfig.getAccumuloInstance(), fconfig.getAccumuloZookeepers()); AccumuloInputFormat.setConnectorInfo(conf, fconfig.getAccumuloUser(), new PasswordToken(fconfig.getAccumuloPassword())); AccumuloInputFormat.setInputTableName(conf, env.getTable()); AccumuloInputFormat.setScanAuthorizations(conf, env.getAuthorizations()); } } catch (Exception e) { throw new RuntimeException(e); } } // TODO support text public static void fetchFamilies(Job job, String... fams) { job.getConfiguration().setStrings(FAMS_CONF_KEY, fams); } public static void fetchFamilies(Job job, Bytes... fams) { // TODO support binary data String[] sfams = new String[fams.length]; for (int i = 0; i < sfams.length; i++) { sfams[i] = fams[i].toString(); } fetchFamilies(job, sfams); } // TODO let user set auths // TODO let user set ranges }