Example usage for org.apache.hadoop.mapred.lib NLineInputFormat NLineInputFormat

List of usage examples for org.apache.hadoop.mapred.lib NLineInputFormat NLineInputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.lib NLineInputFormat NLineInputFormat.

Prototype

NLineInputFormat

Source Link

Usage

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Open Source License

/**
 * Get the list of hostnames where the input split is located.
 *//*w ww.  j  a  v a  2  s  .  co m*/
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
    reader.next(key, value);
    reader.close();

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<String, Integer>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }

        /*
        int lFrom  = t.getIterations().get(0).getIntValue();
        int lTo    = t.getIterations().get(1).getIntValue();
        int lIncr  = t.getIterations().get(2).getIntValue();            
        for( int i=lFrom; i<=lTo; i+=lIncr )
        {
           String fname = _fname+"/"+String.valueOf( ((i-_offset)/_blen+_offset) );
           FileSystem fs = FileSystem.get(job);
           FileStatus status = fs.getFileStatus(new Path(fname)); 
           BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
           for( BlockLocation bl : tmp1 )
              countHosts(hosts, bl.getHosts());
        }*/
    }

    //System.out.println("Get locations "+time.stop()+"");

    //majority consensus on top host
    return getTopHosts(hosts);
}

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void nLine() throws Exception {
        String input = "On the top of the Crumpetty Tree\n" + "The Quangle Wangle sat,\n"
                + "But his face you could not see,\n" + "On account of his Beaver Hat.";

        writeInput(input);/*w w  w  .  j  av  a 2  s. co m*/

        conf.setInt("mapred.line.input.format.linespermap", 2);
        NLineInputFormat format = new NLineInputFormat();
        format.configure(conf);
        InputSplit[] splits = format.getSplits(conf, 2);
        RecordReader<LongWritable, Text> recordReader = format.getRecordReader(splits[0], conf, Reporter.NULL);
        checkNextLine(recordReader, 0, "On the top of the Crumpetty Tree");
        checkNextLine(recordReader, 33, "The Quangle Wangle sat,");
        recordReader = format.getRecordReader(splits[1], conf, Reporter.NULL);
        checkNextLine(recordReader, 57, "But his face you could not see,");
        checkNextLine(recordReader, 89, "On account of his Beaver Hat.");
    }

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Apache License

/**
 * Get the list of hostnames where the input split is located.
 *///from w  ww.jav a  2  s . c om
@Override
public String[] getLocations() throws IOException {
    //Timing time = new Timing();
    //time.start();

    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = IOUtilFunctions.getFileSystem(getPath(), job);

    //read task string
    LongWritable key = new LongWritable();
    Text value = new Text();
    RecordReader<LongWritable, Text> reader = null;
    try {
        reader = new NLineInputFormat().getRecordReader(this, job, Reporter.NULL);
        reader.next(key, value);
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }

    //parse task
    Task t = Task.parseCompactString(value.toString());

    //get all locations
    HashMap<String, Integer> hosts = new HashMap<>();

    if (t.getType() == TaskType.SET) {
        for (IntObject val : t.getIterations()) {
            String fname = _fname + "/" + String.valueOf(((val.getLongValue() - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    } else //TaskType.RANGE
    {
        //since this is a serial process, we use just the first iteration
        //as a heuristic for location information
        long lFrom = t.getIterations().get(0).getLongValue();
        long lTo = t.getIterations().get(1).getLongValue();
        for (long li : new long[] { lFrom, lTo }) {
            String fname = _fname + "/" + String.valueOf(((li - 1) / _blen + 1));
            FileStatus status = fs.getFileStatus(new Path(fname));
            BlockLocation[] tmp1 = fs.getFileBlockLocations(status, 0, status.getLen());
            for (BlockLocation bl : tmp1)
                countHosts(hosts, bl.getHosts());
        }
    }

    //majority consensus on top host
    return getTopHosts(hosts);
}