Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:hdfs.hdfsadapter.XmlInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {//from www .  ja va2 s. co  m
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:hdfs.hdfsadapter.XmlInputFormatOneBufferSolution.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {// w ww  .  j a  v  a  2  s.c o  m
        STARTING_TAG = context.getConfiguration().get("start_tag");
        ENDING_TAG = context.getConfiguration().get("end_tag");
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:hdfs.hdfsadapter.XmlInputFormatTwoBufferSolution.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {//from ww w  .j a v  a2s . co m
        END_TAG = context.getConfiguration().get("end_tag");
        START_TAG = context.getConfiguration().get("start_tag");
        return new XmlRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        return null;
    }
}

From source file:ilps.hadoop.ThriftFileInputFormat.java

License:Apache License

@Override
public RecordReader<Text, StreamItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac)
        throws IOException, InterruptedException {
    return new ThriftRecordReader((FileSplit) split, tac.getConfiguration());
}

From source file:ilps.hadoop.ThriftRecordReader.java

License:Apache License

/** 
 * Boilerplate initialization code for file input streams. 
 * /*from  ww w .  j a va 2 s .c  o m*/
 * Tuan - Add the .xz decompressor here
 * */
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    conf = context.getConfiguration();
    fileSplit = (FileSplit) split;
    start = fileSplit.getStart();
    length = fileSplit.getLength();
    position = 0;

    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Some files are corrupted, report them and move on
    try {
        fis = fs.open(path);
        bis = new BufferedInputStream(fis);
        xzis = new XZCompressorInputStream(bis);
        transport = new TIOStreamTransport(xzis);
    } catch (IOException e) {
        LOG.error("Bad file: ", path.toString());
        e.printStackTrace();
    }

    try {
        if (transport != null)
            transport.open();

        // Skip this file
        else {
            fis = null;
            return;
        }
    } catch (TTransportException e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    factory = new TBinaryProtocol.Factory();
    tp = factory.getProtocol(transport);
    value = new StreamItemWritable(factory);

}

From source file:input_format.HFileRecordReaderBufferedScan.java

License:Open Source License

/**
 * Initializes the reader./*from  w  w  w .j  ava  2  s . c  o  m*/
 * 
 * @param inputsplit  The split to work with.
 * @param context  The current task context.
 * @throws IOException When setting up the reader fails.
 * @throws InterruptedException When the job is aborted.
 * @see org.apache.hadoop.mapreduce.RecordReader#initialize(
 *   org.apache.hadoop.mapreduce.InputSplit, 
 *   org.apache.hadoop.mapreduce.TaskAttemptContext)
 */
@Override
public void initialize(InputSplit inputsplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    tsplit = (TableColumnSplit) inputsplit;
    scan = new Scan();
    /*byte[] rowid =tsplit.getStartRow();
    byte[] startr = new byte[19];
    byte[] stopr = new byte[19];
    for (int i = 0; i < rowid.length; i++) {
       startr[i] =rowid[i];
       stopr[i] =rowid[i];
    }
    if (rowid.length==18) {
       startr[18] =(byte)0;
       stopr[18] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }
    if (rowid.length==10) {
       for (int i = 10; i < startr.length-1; i++) {
          startr[i] =(byte)0;
          stopr[i] =(byte)255;
       }
       startr[startr.length-1] =(byte)0;
       stopr[startr.length-1] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS;
    }*/

    scan.setStartRow(tsplit.getStartRow());
    scan.setStopRow(tsplit.getStopRow());
    //scan.setCaching(1);//50
    scan.setBatch(11000);
    byte[] a, bid = null;
    a = Bytes.toBytes("A");
    bid = new byte[a.length];
    for (int i = 0; i < a.length; i++) {
        bid[i] = a[i];
    }

    //System.out.println(Bytes.toStringBinary(bid));
    scan.addFamily(bid);

    HTable table = new HTable(HBconf, tsplit.getTable());
    resultScanner = table.getScanner(scan);

    //System.out.println(Bytes.toStringBinary(scan.getStartRow()));
    //System.out.println(Bytes.toStringBinary(scan.getStopRow()));
    /*
    System.out.println(Bytes.toString(Bytes.toBytes(scan.getInputColumns())));
    Get get = new Get(scan.getStartRow());
    Result re;
    System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii");
    while((re = resultScanner.next())!=null){
       System.out.println("o");
       System.out.println(re.size());
         //System.out.println(String.format("%s$$%s ", var1, Bytes.toString(list.next().getQualifier())));
    }
    System.exit(1);*/

    result = resultScanner.next();
    more = false;
    if (result == null) {
        empty = true;
    } else {
        more = true;
        list = result.list().iterator();
        kv = list.next();
    }

    Configuration conf = context.getConfiguration();
    String newjoinVars = conf.get("input.patId");
    String joinVars = newjoinVars.split(tsplit.getFname())[1];
    joinVars = joinVars.substring(0, joinVars.indexOf("$$") - 1);
    String vars = tsplit.getVars();
    StringTokenizer vtok = new StringTokenizer(vars);
    varsno = 0;
    while (vtok.hasMoreTokens()) {
        vtok.nextToken();
        varsno++;
    }
    if (varsno == 1) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
    } else if (varsno == 2) {
        StringTokenizer vtok2 = new StringTokenizer(vars);
        v1 = vtok2.nextToken();
        v2 = vtok2.nextToken();
    }
}

From source file:input_format.MultiHFileOutputFormat.java

License:Open Source License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {

        @Override//from   w  w w . ja v  a  2s .  c o m
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) {
                writer.close(context);
            }
        }

        @Override
        public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException {
            RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key);
            if (writer == null) {
                final Path outputPath = new Path(
                        FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get()));
                writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() {
                    final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
                    final Path outputdir = committer.getWorkPath();
                    final Configuration conf = context.getConfiguration();
                    final FileSystem fs = outputdir.getFileSystem(conf);
                    final long maxsize = conf.getLong("hbase.hregion.max.filesize",
                            HConstants.DEFAULT_MAX_FILE_SIZE);
                    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
                    // Invented config.  Add to hbase-*.xml if other than default compression.
                    final String compression = conf.get("hfile.compression",
                            Compression.Algorithm.NONE.getName());

                    // Map of families to writers and how much has been output on the writer.
                    final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                            Bytes.BYTES_COMPARATOR);
                    byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
                    final byte[] now = Bytes.toBytes(System.currentTimeMillis());
                    boolean rollRequested = false;

                    public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
                        // null input == user explicitly wants to flush
                        if (row == null && kv == null) {
                            rollWriters();
                            return;
                        }

                        byte[] rowKey = kv.getRow();
                        long length = kv.getLength();
                        byte[] family = kv.getFamily();
                        WriterLength wl = this.writers.get(family);

                        // If this is a new column family, verify that the directory exists
                        if (wl == null) {
                            fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
                        }

                        // If any of the HFiles for the column families has reached
                        // maxsize, we need to roll all the writers
                        if (wl != null && wl.written + length >= maxsize) {
                            this.rollRequested = true;
                        }

                        // This can only happen once a row is finished though
                        if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                            rollWriters();
                        }

                        // create a new HLog writer, if necessary
                        if (wl == null || wl.writer == null) {
                            wl = getNewWriter(family);
                        }

                        // we now have the proper HLog writer. full steam ahead
                        kv.updateLatestStamp(this.now);
                        wl.writer.append(kv);
                        wl.written += length;

                        // Copy the row so we know when a row transition.
                        this.previousRow = rowKey;
                    }

                    private void rollWriters() throws IOException {
                        for (WriterLength wl : this.writers.values()) {
                            if (wl.writer != null) {
                                close(wl.writer);
                            }
                            wl.writer = null;
                            wl.written = 0;
                        }
                        this.rollRequested = false;
                    }

                    private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir,
                            Configuration conf) throws IOException {
                        if (writer != null) {
                            close(writer);
                        }

                        return HFile.getWriterFactory(conf).createWriter(fs,
                                StoreFile.getUniqueFile(fs, familydir), blocksize, compression,
                                KeyValue.KEY_COMPARATOR);

                        // return new HFile.Writer(fs,  StoreFile.getUniqueFile(fs, familydir),
                        //         blocksize, compression, KeyValue.KEY_COMPARATOR);
                    }

                    private WriterLength getNewWriter(byte[] family) throws IOException {
                        WriterLength wl = new WriterLength();
                        Path familydir = new Path(outputdir, Bytes.toString(family));
                        wl.writer = getNewWriter(wl.writer, familydir, conf);
                        this.writers.put(family, wl);
                        return wl;
                    }

                    private void close(final HFile.Writer w) throws IOException {
                        if (w != null) {
                            w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
                                    Bytes.toBytes(System.currentTimeMillis()));
                            w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                                    Bytes.toBytes(context.getTaskAttemptID().toString()));
                            w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                            w.close();
                        }
                    }

                    public void close(TaskAttemptContext c) throws IOException, InterruptedException {
                        for (WriterLength wl : this.writers.values()) {
                            close(wl.writer);
                        }
                        committer.commitTask(c);
                    }
                };

                writers.put(key, writer);
            }

            writer.write(new ImmutableBytesWritable(value.getRow()), value);
        }
    };
}

From source file:InvertedIndex.NLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.job = job;
    this.context = context;
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from   www  .  j a v a  2  s  .c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    this.path = file;
    this.length = split.getLength();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (0 == split.getLength() && job.getBoolean("mapred.ignore.badcompress", false)) {
            if (null != context && context instanceof TaskInputOutputContext) {
                ((TaskInputOutputContext) context).getCounter("Input Counter", "Gzip File length is zero")
                        .increment(1);
            }
            if (null != this.path) {
                LOG.warn("Skip 0-length Zip file: " + this.path.toString());
            }
            in = new NLineReader(fileIn, job);
        } else {
            try {
                in = new NLineReader(codec.createInputStream(fileIn), job);
                end = Long.MAX_VALUE;
            } catch (IOException e) {
                if (isIgnoreBadCompress(job, e)) {
                    in = new NLineReader(fileIn, job);
                    end = start;
                    LOG.warn("Skip Bad Compress File: " + this.path.toString());
                    LOG.warn("initialize line read error", e);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Skip Bad Zip File")
                            .increment(1);
                    ((TaskInputOutputContext) context).getCounter("Input Counter", "Total Skip Bad Zip Length")
                            .increment(this.length);
                } else {
                    throw e;
                }
            }
        }
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new NLineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:io.amient.kafka.hadoop.io.MultiOutputFormat.java

License:Apache License

public RecordWriter<MsgMetadataWritable, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {

    final TaskAttemptContext taskContext = context;
    final Configuration conf = context.getConfiguration();
    final boolean isCompressed = getCompressOutput(context);
    String ext = "";
    CompressionCodec gzipCodec = null;//from   w  w w . jav a  2s. c  o  m
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        gzipCodec = ReflectionUtils.newInstance(codecClass, conf);
        ext = ".gz";
    }
    final CompressionCodec codec = gzipCodec;
    final String extension = ext;

    final String pathFormat = conf.get(CONFIG_PATH_FORMAT, "'{T}/{P}'");
    log.info("Using path format: " + pathFormat);
    final SimpleDateFormat timeFormat = new SimpleDateFormat(pathFormat);
    timeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    final DecimalFormat offsetFormat = new DecimalFormat("0000000000000000000");
    final boolean hasTS = HadoopJobMapper.isTimestampExtractorConfigured(conf);

    return new RecordWriter<MsgMetadataWritable, BytesWritable>() {
        TreeMap<String, RecordWriter<Void, BytesWritable>> recordWriters = new TreeMap<>();

        Path prefixPath = ((FileOutputCommitter) getOutputCommitter(taskContext)).getWorkPath();

        public void write(MsgMetadataWritable key, BytesWritable value) throws IOException {
            if (hasTS && key.getTimestamp() == null) {
                //extractor didn't wish to throw exception so skipping this record
                return;
            }
            String P = String.valueOf(key.getSplit().getPartition());
            String T = key.getSplit().getTopic();
            String suffixPath = hasTS ? timeFormat.format(key.getTimestamp()) : pathFormat.replaceAll("'", "");
            suffixPath = suffixPath.replace("{T}", T);
            suffixPath = suffixPath.replace("{P}", P);
            suffixPath += "/" + T + "-" + P + "-" + offsetFormat.format(key.getSplit().getStartOffset());
            suffixPath += extension;
            RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(suffixPath);
            try {
                if (rw == null) {
                    Path file = new Path(prefixPath, suffixPath);
                    FileSystem fs = file.getFileSystem(conf);
                    FSDataOutputStream fileOut = fs.create(file, false);
                    if (isCompressed) {
                        rw = new LineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)));
                    } else {
                        rw = new LineRecordWriter(fileOut);
                    }
                    this.recordWriters.put(suffixPath, rw);
                }
                rw.write(null, value);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(keys.next());
                rw.close(context);
            }
            this.recordWriters.clear();
        }

    };
}

From source file:io.covert.dns.collection.DnsRequestInputFormat.java

License:Apache License

@Override
public RecordReader<Text, DnsRequest> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();

    int dclass = DClass.value(conf.get("dns.request.dclass", "IN"));
    List<String> subdomains = Arrays.asList(conf.get("dns.requests.subdomains", "").split(","));

    List<Integer> types = new LinkedList<Integer>();
    for (String type : conf.get("dns.request.types", "A").split(","))
        types.add(Type.value(type));

    return new DnsRequestRecordReader(subdomains, types, dclass);
}