Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:gov.jgi.meta.hadoop.input.FastqBlockRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*  w ww  .java  2 s .  c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastqBlockLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastqBlockLineReader(fileIn, job);
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.input.FastqRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();// w w  w .  j a  va2  s  . c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastqLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastqLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.output.FastaOutputFormat.java

License:Open Source License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;/*  w  w w  . j  a  va  2  s.  c om*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return (new FastaRecordWriter<K, V>(fileOut));
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return (new FastaRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut))));
    }
}

From source file:gov.llnl.ontology.text.hbase.XMLRecordReader.java

License:Open Source License

/**
 * Extract the {@link Path} for the file to be processed by this {@link
 * XMLRecordReader}./*w ww.  j  a  v a  2s  . c  o  m*/
 */
public void initialize(InputSplit isplit, TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration config = context.getConfiguration();

    // Get the file stream for the xml file.
    FileSplit split = (FileSplit) isplit;
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(config);
    fsin = (useGzip) ? new GZIPInputStream(fs.open(split.getPath())) : fs.open(split.getPath());
    fsin = new BufferedInputStream(fsin);

    // Setup the limits of the xml file.
    start = split.getStart();
    end = start + split.getLength();
    pos = 0;

    // Get the xml document delmiters for this xml file.
    if (!config.get(DELIMITER_TAG).equals("")) {
        startTag = ("<" + config.get(DELIMITER_TAG)).getBytes();
        endTag = ("</" + config.get(DELIMITER_TAG) + ">").getBytes();
    } else {
        String fileNameBase = file.getName().replace(".xml", "");
        startTag = ("<" + fileNameBase).getBytes();
        endTag = ("</" + fileNameBase).getBytes();
    }
    context.setStatus(file.getName() + " " + pos + " " + end);
}

From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBufferedScan.java

License:Open Source License

/**
 * Initializes the reader.// w w w .  j  a  va2 s  . c o  m
 * 
 * @param inputsplit  The split to work with.
 * @param context  The current task context.
 * @throws IOException When setting up the reader fails.
 * @throws InterruptedException When the job is aborted.
 * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
 *   org.apache.hadoop.mapreduce.InputSplit, 
 *   org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    tsplit = (TableColumnSplit) inputsplit;
    scan = new Scan();
    /*byte[] rowid =tsplit.getStartRow();
    byte[] startr = new byte[19];
    byte[] stopr = new byte[19];
    for (int i = 0; i < rowid.length; i++) {
       startr[i] =rowid[i];
       stopr[i] =rowid[i];
    }
    if (rowid.length==18) {
       startr[18] =(byte)0;
       stopr[18] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }
    if (rowid.length==10) {
       for (int i = 10; i < startr.length-1; i++) {
          startr[i] =(byte)0;
          stopr[i] =(byte)255;
       }
       startr[startr.length-1] =(byte)0;
       stopr[startr.length-1] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }*/

    scan.setStartRow(tsplit.getStartRow());
    scan.setStopRow(tsplit.getStopRow());
    scan.setCaching(30000); //good for mapreduce scan
    scan.setCacheBlocks(false); //good for mapreduce scan
    scan.setBatch(30000); //good for mapreduce scan
    byte[] a, bid = null;
    a = Bytes.toBytes("A");
    bid = new byte[a.length];
    for (int i = 0; i < a.length; i++) {
        bid[i] = a[i];
    }

    //System.out.println(Bytes.toStringBinary(bid));
    scan.addFamily(bid);

    HTable table = new HTable(HBconf, tsplit.getTable());
    resultScanner = table.getScanner(scan);

    //System.out.println(Bytes.toStringBinary(scan.getStartRow()));
    //System.out.println(Bytes.toStringBinary(scan.getStopRow()));
    /*
    System.out.println(Bytes.toString(Bytes.toBytes(scan.getInputColumns())));
    Get get = new Get(scan.getStartRow());
    Result re;
    System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii");
    while((re = resultScanner.next())!=null){
       System.out.println("o");
       System.out.println(re.size());
         //System.out.println(String.format("%s$$%s ", var1, Bytes.toString(list.next().getQualifier())));
    }
    System.exit(1);*/

    result = resultScanner.next();
    more = false;
    if (result == null) {
        empty = true;
    } else {
        more = true;
        list = result.list().iterator();
        kv = list.next();
    }

    Configuration conf = context.getConfiguration();
    String newjoinVars = conf.get("input.patId");
    String joinVars = newjoinVars.split(tsplit.getFname())[1];
    joinVars = joinVars.substring(0, joinVars.indexOf("$$") - 1);
    String vars = tsplit.getVars();
    StringTokenizer vtok = new StringTokenizer(vars);
    varsno = 0;
    while (vtok.hasMoreTokens()) {
        vtok.nextToken();
        varsno++;
    }
    if (varsno == 1) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
    } else if (varsno == 2) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
        v2 = vtok2.nextToken();
    }
}

From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override//  ww w. ja  va 2 s  .c  om
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactoryNoCache(conf).create();
                        //return HFile.getWriterFactory(conf).createWriter(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //        blocksize, compression, KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:gr.ntua.h2rdf.inputFormat.MyLineRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    MyFileSplit split = (MyFileSplit) (MyInputSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from w w  w  . jav a  2s  .  com
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:gr.ntua.h2rdf.inputFormat2.MultiTableInputFormatBase.java

License:Open Source License

/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
 * default.// w  ww .  ja va  2 s .  c  o m
 *
 * @param split The split to work with.
 * @param context The current context.
 * @return The newly created record reader.
 * @throws IOException When creating the reader fails.
 * @throws InterruptedException when record reader initialization fails
 * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
 *      org.apache.hadoop.mapreduce.InputSplit,
 *      org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public RecordReader<Bindings, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    TableSplit tSplit = (TableSplit) split;

    if (tSplit.getTableName() == null) {
        throw new IOException("Cannot create a record reader because of a"
                + " previous error. Please look at the previous logs lines from"
                + " the task's full log for more details.");
    }
    HTable table = new HTable(context.getConfiguration(), tSplit.getTableName());

    TableRecordReader trr = this.tableRecordReader;
    // if no table record reader was provided use default
    if (trr == null) {
        trr = new TableRecordReader();
    }
    Scan sc = tSplit.getScan();
    sc.setStartRow(tSplit.getStartRow());
    sc.setStopRow(tSplit.getEndRow());
    trr.setScan(sc);
    trr.setHTable(table);
    trr.initialize(split, context);
    return trr;
}

From source file:hadoop.inputsplit.FastaLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    done = false;// w w  w .jav  a 2 s .c om

    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();

    file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    currentValue = new ValueWritable();
    value = new Text();
    tmpValue = new Text();
    tmp = new Text();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    String homeHdfs = context.getConfiguration().get("HDFS_HOME_DIR");
    //maxK = HadoopUtil.getMaxkFromPatterns(fs, new Path(homeHdfs+Constant.HDFS_PATTERNS_FILE_HDFS));

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job, recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job, recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;

    setKeySeq(fs, job); //Set currentKey

    nextMyKeyValue(); //Leggo il primo record se esiste.

}

From source file:hadoop.TweetRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    start = split.getStart();/*from   ww w  .  j a  va2 s.c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }

    this.pos = start;
}