Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsMultipleOutputs.java

License:Apache License

private static Class<? extends Writable> getNamedOutputValueClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Writable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java

License:Apache License

/**
 * Get the key class for the {@link SequenceFile}
 *
 * @return the key class of the {@link SequenceFile}
 *///from   ww  w  . j  a  v a  2s. c  om
static public Class<? extends WritableComparable> getSequenceFileOutputKeyClass(JobContext job) {
    return job.getConfiguration().getClass(KEY_CLASS,
            job.getOutputKeyClass().asSubclass(WritableComparable.class), WritableComparable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java

License:Apache License

/**
 * Get the value class for the {@link SequenceFile}
 *
 * @return the value class of the {@link SequenceFile}
 *//*  ww w. j  av  a2  s.c om*/
static public Class<? extends Writable> getSequenceFileOutputValueClass(JobContext job) {
    return job.getConfiguration().getClass(VALUE_CLASS, job.getOutputValueClass().asSubclass(Writable.class),
            Writable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileOutputFormat.java

License:Apache License

/**
 * Get the {@link CompressionType} for the output {@link SequenceFile}.
 *
 * @param job the {@link Job}/*ww w  .j  ava 2 s .com*/
 * @return the {@link CompressionType} for the output {@link SequenceFile},
 * defaulting to {@link CompressionType#RECORD}
 */
public static CompressionType getOutputCompressionType(JobContext job) {
    String val = job.getConfiguration().get("edu.arizona.cs.hadoop.fs.irods.mapred.output.compression.type",
            CompressionType.RECORD.toString());
    return CompressionType.valueOf(val);
}

From source file:edu.berkeley.cs.amplab.adam.io.InterleavedFastqInputFormat.java

License:Apache License

@Override
public boolean isSplitable(JobContext context, Path path) {
    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
    return codec == null;
}

From source file:edu.indiana.d2i.htrc.io.dataapi.IDInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6);
    String hostStr = job.getConfiguration().get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA,
            HTRCConstants.DATA_API_DEFAULT_URL);
    if (hostStr == null)
        throw new RuntimeException("Cannot find hosts of HTRC Data Storage.");
    String[] hosts = hostStr.split(",");

    IDInputSplit split = new IDInputSplit(hosts);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);
    try {/*ww w .  ja  va  2s.co m*/
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            Iterator<Text> idlist = new IDList(fsinput).iterator();
            while (idlist.hasNext()) {
                Text id = idlist.next();
                split.addID(id.toString());
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    split = new IDInputSplit(hosts);
                }
            }

            //            LineReader reader = new LineReader(fsinput);
            //            Text line = new Text();
            //            while (reader.readLine(line) > 0) {
            //               split.addID(line.toString());
            //               if (split.getLength() >= numIdsInSplit) {
            //                  splits.add(split);
            //                  split = new IDInputSplit(hosts);
            //               }
            //            }
            //            reader.close();
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.indiana.d2i.htrc.io.index.lucene.LuceneIDFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6);

    String line = null;//from  w w  w .  j  a  v a  2  s . c  om
    IDInputSplit split = new IDInputSplit();
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);

    try {
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput));
            while ((line = reader.readLine()) != null) {
                split.addID(line);
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    split = new IDInputSplit();
                }
            }
            reader.close();
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.indiana.d2i.htrc.io.mem.MemIDInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, 8000);
    //      String[] hosts = job.getConfiguration().getStrings(HTRCConstants.MEMCACHED_HOSTS);
    //      if (hosts == null)
    //         throw new IllegalArgumentException("No host is found for memcached");

    //      IDInputSplit split = new IDInputSplit(hosts);
    IDInputSplit split = new IDInputSplit();
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);
    try {/*from   w  w  w  .j  a  va  2  s . c o  m*/
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            Iterator<Text> idlist = new IDList(fsinput).iterator();
            while (idlist.hasNext()) {
                Text id = idlist.next();
                split.addID(id.toString());
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    //                  split = new IDInputSplit(hosts);
                    split = new IDInputSplit();
                }
            }
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.iu.common.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;//from w  w w.  j  a  va2 s  .  c o  m
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    // Random random = new Random(System.nanoTime());
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<Path>();
            hostSet = new HashSet<String>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:edu.iu.fileformat.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    // randomizeFileListOrder(files);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;/* w w  w . j  a v  a 2 s  .c om*/
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<>();
            hostSet = new HashSet<>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the
    // job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}