Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:hdfs.hdfsadapter.XmlInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {//from www .  ja va2 s. co  m
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:hdfs.hdfsadapter.XmlInputFormatOneBufferSolution.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {// w ww  .  j a  v  a  2  s.c o  m
        STARTING_TAG = context.getConfiguration().get("start_tag");
        ENDING_TAG = context.getConfiguration().get("end_tag");
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:hdfs.hdfsadapter.XmlInputFormatTwoBufferSolution.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {//from ww w  .j a v  a2s . co m
        END_TAG = context.getConfiguration().get("end_tag");
        START_TAG = context.getConfiguration().get("start_tag");
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:ilps.hadoop.ThriftFileInputFormat.java

License:Apache License

@Override
public RecordReader<Text, StreamItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac)
        throws IOException, InterruptedException {
    return new ThriftRecordReader((FileSplit) split, tac.getConfiguration());
}

From source file:ilps.hadoop.ThriftRecordReader.java

License:Apache License

/** 
 * Boilerplate initialization code for file input streams. 
 * /*from  ww w .  j a va 2 s .c  o m*/
 * Tuan - Add the .xz decompressor here
 * */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = 0;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Some files are corrupted, report them and move on
    try {
        fis = fs.open(path);
        bis = new BufferedInputStream(fis);
        xzis = new XZCompressorInputStream(bis);
        transport = new TIOStreamTransport(xzis);
    } catch (IOException e) {
        LOG.error("Bad file: ", path.toString());
        e.printStackTrace();
    }

    try {
        if (transport != null)
            transport.open();

        // Skip this file
        else {
            fis = null;
            return;
        }
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    factory = new TBinaryProtocol.Factory();
    tp = factory.getProtocol(transport);
    value = new StreamItemWritable(factory);

}

From source file:input_format.HFileRecordReaderBufferedScan.java

License:Open Source License

/**
 * Initializes the reader./*from  w  w  w .j  ava  2  s . c  o  m*/
 * 
 * @param inputsplit  The split to work with.
 * @param context  The current task context.
 * @throws IOException When setting up the reader fails.
 * @throws InterruptedException When the job is aborted.
 * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
 *   org.apache.hadoop.mapreduce.InputSplit, 
 *   org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    tsplit = (TableColumnSplit) inputsplit;
    scan = new Scan();
    /*byte[] rowid =tsplit.getStartRow();
    byte[] startr = new byte[19];
    byte[] stopr = new byte[19];
    for (int i = 0; i < rowid.length; i++) {
       startr[i] =rowid[i];
       stopr[i] =rowid[i];
    }
    if (rowid.length==18) {
       startr[18] =(byte)0;
       stopr[18] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }
    if (rowid.length==10) {
       for (int i = 10; i < startr.length-1; i++) {
          startr[i] =(byte)0;
          stopr[i] =(byte)255;
       }
       startr[startr.length-1] =(byte)0;
       stopr[startr.length-1] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }*/

    scan.setStartRow(tsplit.getStartRow());
    scan.setStopRow(tsplit.getStopRow());
    //scan.setCaching(1);//50
    scan.setBatch(11000);
    byte[] a, bid = null;
    a = Bytes.toBytes("A");
    bid = new byte[a.length];
    for (int i = 0; i < a.length; i++) {
        bid[i] = a[i];
    }

    //System.out.println(Bytes.toStringBinary(bid));
    scan.addFamily(bid);

    HTable table = new HTable(HBconf, tsplit.getTable());
    resultScanner = table.getScanner(scan);

    //System.out.println(Bytes.toStringBinary(scan.getStartRow()));
    //System.out.println(Bytes.toStringBinary(scan.getStopRow()));
    /*
    System.out.println(Bytes.toString(Bytes.toBytes(scan.getInputColumns())));
    Get get = new Get(scan.getStartRow());
    Result re;
    System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii");
    while((re = resultScanner.next())!=null){
       System.out.println("o");
       System.out.println(re.size());
         //System.out.println(String.format("%s$$%s ", var1, Bytes.toString(list.next().getQualifier())));
    }
    System.exit(1);*/

    result = resultScanner.next();
    more = false;
    if (result == null) {
        empty = true;
    } else {
        more = true;
        list = result.list().iterator();
        kv = list.next();
    }

    Configuration conf = context.getConfiguration();
    String newjoinVars = conf.get("input.patId");
    String joinVars = newjoinVars.split(tsplit.getFname())[1];
    joinVars = joinVars.substring(0, joinVars.indexOf("$$") - 1);
    String vars = tsplit.getVars();
    StringTokenizer vtok = new StringTokenizer(vars);
    varsno = 0;
    while (vtok.hasMoreTokens()) {
        vtok.nextToken();
        varsno++;
    }
    if (varsno == 1) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
    } else if (varsno == 2) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
        v2 = vtok2.nextToken();
    }
}

From source file:input_format.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override//from   w  w w . ja v  a  2s .  c o m
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactory(conf).createWriter(fs,
                                StoreFile.getUniqueFile(fs, familydir), blocksize, compression,
                                KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:InvertedIndex.NLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.job = job;
    this.context = context;
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from   www  .  j a v a  2  s  .c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    this.path = file;
    this.length = split.getLength();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (0 == split.getLength() && job.getBoolean("mapred.ignore.badcompress", false)) {
            if (null != context && context instanceof TaskInputOutputContext) {
                ((TaskInputOutputContext) context).getCounter("Input Counter", "Gzip File length is zero")
                        .increment(1);
            }
            if (null != this.path) {
                LOG.warn("Skip 0-length Zip file: " + this.path.toString());
            }
            in = new NLineReader(fileIn, job);
        } else {
            try {
                in = new NLineReader(codec.createInputStream(fileIn), job);
                end = Long.MAX_VALUE;
            } catch (IOException e) {
                if (isIgnoreBadCompress(job, e)) {
                    in = new NLineReader(fileIn, job);
                    end = start;
                    LOG.warn("Skip Bad Compress File: " + this.path.toString());
                    LOG.warn("initialize line read error", e);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Skip Bad Zip File")
                            .increment(1);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Total Skip Bad Zip Length")
                            .increment(this.length);
                } else {
                    throw e;
                }
            }
        }
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new NLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:io.amient.kafka.hadoop.io.MultiOutputFormat.java

License:Apache License

public RecordWriter<MsgMetadataWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {

    final TaskAttemptContext taskContext = context;
    final Configuration conf = context.getConfiguration();
    final boolean isCompressed = getCompressOutput(context);
    String ext = "";
    CompressionCodec gzipCodec = null;//from   w  w w . jav a  2s. c  o  m
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        gzipCodec = ReflectionUtils.newInstance(codecClass, conf);
        ext = ".gz";
    }
    final CompressionCodec codec = gzipCodec;
    final String extension = ext;

    final String pathFormat = conf.get(CONFIG_PATH_FORMAT, "'{T}/{P}'");
    log.info("Using path format: " + pathFormat);
    final SimpleDateFormat timeFormat = new SimpleDateFormat(pathFormat);
    timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    final DecimalFormat offsetFormat = new DecimalFormat("0000000000000000000");
    final boolean hasTS = HadoopJobMapper.isTimestampExtractorConfigured(conf);

    return new RecordWriter<MsgMetadataWritable, BytesWritable>() {
        TreeMap<String, RecordWriter<Void, BytesWritable>> recordWriters = new TreeMap<>();

        Path prefixPath = ((FileOutputCommitter) getOutputCommitter(taskContext)).getWorkPath();

        public void write(MsgMetadataWritable key, BytesWritable value) throws IOException {
            if (hasTS && key.getTimestamp() == null) {
                //extractor didn't wish to throw exception so skipping this record
                return;
            }
            String P = String.valueOf(key.getSplit().getPartition());
            String T = key.getSplit().getTopic();
            String suffixPath = hasTS ? timeFormat.format(key.getTimestamp()) : pathFormat.replaceAll("'", "");
            suffixPath = suffixPath.replace("{T}", T);
            suffixPath = suffixPath.replace("{P}", P);
            suffixPath += "/" + T + "-" + P + "-" + offsetFormat.format(key.getSplit().getStartOffset());
            suffixPath += extension;
            RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(suffixPath);
            try {
                if (rw == null) {
                    Path file = new Path(prefixPath, suffixPath);
                    FileSystem fs = file.getFileSystem(conf);
                    FSDataOutputStream fileOut = fs.create(file, false);
                    if (isCompressed) {
                        rw = new LineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)));
                    } else {
                        rw = new LineRecordWriter(fileOut);
                    }
                    this.recordWriters.put(suffixPath, rw);
                }
                rw.write(null, value);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(keys.next());
                rw.close(context);
            }
            this.recordWriters.clear();
        }

    };
}

From source file:io.covert.dns.collection.DnsRequestInputFormat.java

License:Apache License

@Override
public RecordReader<Text, DnsRequest> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();

    int dclass = DClass.value(conf.get("dns.request.dclass", "IN"));
    List<String> subdomains = Arrays.asList(conf.get("dns.requests.subdomains", "").split(","));

    List<Integer> types = new LinkedList<Integer>();
    for (String type : conf.get("dns.request.types", "A").split(","))
        types.add(Type.value(type));

    return new DnsRequestRecordReader(subdomains, types, dclass);
}