Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:gov.jgi.meta.hadoop.input.FastqBlockRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*  w ww  .java  2 s .  c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastqBlockLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastqBlockLineReader(fileIn, job);
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.input.FastqRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();// w w  w .  j a  va2  s  . c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new FastqLineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = false; // don't do this!
            //--start;                      or this
            fileIn.seek(start);
        }
        in = new FastqLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:gov.jgi.meta.hadoop.output.FastaOutputFormat.java

License:Open Source License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;/*  w  w w  . j  a  va  2  s.  c om*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return (new FastaRecordWriter<K, V>(fileOut));
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return (new FastaRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut))));
    }
}

From source file:gov.llnl.ontology.text.hbase.XMLRecordReader.java

License:Open Source License

/**
 * Extract the {@link Path} for the file to be processed by this {@link
 * XMLRecordReader}./*w ww.  j  a  v a  2s  . c  o  m*/
 */
public void initialize(InputSplit isplit, TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration config = context.getConfiguration();

    // Get the file stream for the xml file.
    FileSplit split = (FileSplit) isplit;
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(config);
    fsin = (useGzip) ? new GZIPInputStream(fs.open(split.getPath())) : fs.open(split.getPath());
    fsin = new BufferedInputStream(fsin);

    // Setup the limits of the xml file.
    start = split.getStart();
    end = start + split.getLength();
    pos = 0;

    // Get the xml document delmiters for this xml file.
    if (!config.get(DELIMITER_TAG).equals("")) {
        startTag = ("<" + config.get(DELIMITER_TAG)).getBytes();
        endTag = ("</" + config.get(DELIMITER_TAG) + ">").getBytes();
    } else {
        String fileNameBase = file.getName().replace(".xml", "");
        startTag = ("<" + fileNameBase).getBytes();
        endTag = ("</" + fileNameBase).getBytes();
    }
    context.setStatus(file.getName() + " " + pos + " " + end);
}

From source file:gr.ntua.h2rdf.inputFormat.HFileRecordReaderBufferedScan.java

License:Open Source License

/**
 * Initializes the reader.// w w w .  j  a  va2 s  . c o  m
 * 
 * @param inputsplit  The split to work with.
 * @param context  The current task context.
 * @throws IOException When setting up the reader fails.
 * @throws InterruptedException When the job is aborted.
 * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
 *   org.apache.hadoop.mapreduce.InputSplit, 
 *   org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    tsplit = (TableColumnSplit) inputsplit;
    scan = new Scan();
    /*byte[] rowid =tsplit.getStartRow();
    byte[] startr = new byte[19];
    byte[] stopr = new byte[19];
    for (int i = 0; i < rowid.length; i++) {
       startr[i] =rowid[i];
       stopr[i] =rowid[i];
    }
    if (rowid.length==18) {
       startr[18] =(byte)0;
       stopr[18] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }
    if (rowid.length==10) {
       for (int i = 10; i < startr.length-1; i++) {
          startr[i] =(byte)0;
          stopr[i] =(byte)255;
       }
       startr[startr.length-1] =(byte)0;
       stopr[startr.length-1] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }*/

    scan.setStartRow(tsplit.getStartRow());
    scan.setStopRow(tsplit.getStopRow());
    scan.setCaching(30000); //good for mapreduce scan
    scan.setCacheBlocks(false); //good for mapreduce scan
    scan.setBatch(30000); //good for mapreduce scan
    byte[] a, bid = null;
    a = Bytes.toBytes("A");
    bid = new byte[a.length];
    for (int i = 0; i < a.length; i++) {
        bid[i] = a[i];
    }

    //System.out.println(Bytes.toStringBinary(bid));
    scan.addFamily(bid);

    HTable table = new HTable(HBconf, tsplit.getTable());
    resultScanner = table.getScanner(scan);

    //System.out.println(Bytes.toStringBinary(scan.getStartRow()));
    //System.out.println(Bytes.toStringBinary(scan.getStopRow()));
    /*
    System.out.println(Bytes.toString(Bytes.toBytes(scan.getInputColumns())));
    Get get = new Get(scan.getStartRow());
    Result re;
    System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii");
    while((re = resultScanner.next())!=null){
       System.out.println("o");
       System.out.println(re.size());
         //System.out.println(String.format("%s$$%s ", var1, Bytes.toString(list.next().getQualifier())));
    }
    System.exit(1);*/

    result = resultScanner.next();
    more = false;
    if (result == null) {
        empty = true;
    } else {
        more = true;
        list = result.list().iterator();
        kv = list.next();
    }

    Configuration conf = context.getConfiguration();
    String newjoinVars = conf.get("input.patId");
    String joinVars = newjoinVars.split(tsplit.getFname())[1];
    joinVars = joinVars.substring(0, joinVars.indexOf("$$") - 1);
    String vars = tsplit.getVars();
    StringTokenizer vtok = new StringTokenizer(vars);
    varsno = 0;
    while (vtok.hasMoreTokens()) {
        vtok.nextToken();
        varsno++;
    }
    if (varsno == 1) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
    } else if (varsno == 2) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
        v2 = vtok2.nextToken();
    }
}

From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override//  ww w. ja  va 2 s  .c  om
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactoryNoCache(conf).create();
                        //return HFile.getWriterFactory(conf).createWriter(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //        blocksize, compression, KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:gr.ntua.h2rdf.inputFormat.MyLineRecordReader.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    MyFileSplit split = (MyFileSplit) (MyInputSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from w w  w  . jav a  2s  .  com
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:gr.ntua.h2rdf.inputFormat2.MultiTableInputFormatBase.java

License:Open Source License

/**
 * Builds a TableRecordReader. If no TableRecordReader was provided, uses the
 * default.// w  ww .  ja va  2 s .  c  o m
 *
 * @param split The split to work with.
 * @param context The current context.
 * @return The newly created record reader.
 * @throws IOException When creating the reader fails.
 * @throws InterruptedException when record reader initialization fails
 * @see org.apache.hadoop.mapreduce.InputFormat#createRecordReader(
 *      org.apache.hadoop.mapreduce.InputSplit,
 *      org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public RecordReader<Bindings, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    TableSplit tSplit = (TableSplit) split;

    if (tSplit.getTableName() == null) {
        throw new IOException("Cannot create a record reader because of a"
                + " previous error. Please look at the previous logs lines from"
                + " the task's full log for more details.");
    }
    HTable table = new HTable(context.getConfiguration(), tSplit.getTableName());

    TableRecordReader trr = this.tableRecordReader;
    // if no table record reader was provided use default
    if (trr == null) {
        trr = new TableRecordReader();
    }
    Scan sc = tSplit.getScan();
    sc.setStartRow(tSplit.getStartRow());
    sc.setStopRow(tSplit.getEndRow());
    trr.setScan(sc);
    trr.setHTable(table);
    trr.initialize(split, context);
    return trr;
}

From source file:hadoop.inputsplit.FastaLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    done = false;// w w  w .jav  a 2 s .c om

    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();

    file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    currentValue = new ValueWritable();
    value = new Text();
    tmpValue = new Text();
    tmp = new Text();

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    String homeHdfs = context.getConfiguration().get("HDFS_HOME_DIR");
    //maxK = HadoopUtil.getMaxkFromPatterns(fs, new Path(homeHdfs+Constant.HDFS_PATTERNS_FILE_HDFS));

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job, recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job, recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job, recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;

    setKeySeq(fs, job); //Set currentKey

    nextMyKeyValue(); //Leggo il primo record se esiste.

}

From source file:hadoop.TweetRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();

    start = split.getStart();/*from   ww w  .  j a  va2 s.c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new LineReader(cIn, job);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }

    this.pos = start;
}