Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsMultipleOutputs.java

License:Apache License

private static Class<? extends Writable> getNamedOutputValueClass(JobContext job, String namedOutput) {
    return job.getConfiguration().getClass(MO_PREFIX + namedOutput + VALUE, null, Writable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java

License:Apache License

/**
 * Get the key class for the {@link SequenceFile}
 *
 * @return the key class of the {@link SequenceFile}
 *///from   ww  w  . j  a  v a  2s. c  om
static public Class<? extends WritableComparable> getSequenceFileOutputKeyClass(JobContext job) {
    return job.getConfiguration().getClass(KEY_CLASS,
            job.getOutputKeyClass().asSubclass(WritableComparable.class), WritableComparable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileAsBinaryOutputFormat.java

License:Apache License

/**
 * Get the value class for the {@link SequenceFile}
 *
 * @return the value class of the {@link SequenceFile}
 *//*  ww w. j  av  a2  s.c om*/
static public Class<? extends Writable> getSequenceFileOutputValueClass(JobContext job) {
    return job.getConfiguration().getClass(VALUE_CLASS, job.getOutputValueClass().asSubclass(Writable.class),
            Writable.class);
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsSequenceFileOutputFormat.java

License:Apache License

/**
 * Get the {@link CompressionType} for the output {@link SequenceFile}.
 *
 * @param job the {@link Job}/*ww w  .j  ava 2 s .com*/
 * @return the {@link CompressionType} for the output {@link SequenceFile},
 * defaulting to {@link CompressionType#RECORD}
 */
public static CompressionType getOutputCompressionType(JobContext job) {
    String val = job.getConfiguration().get("edu.arizona.cs.hadoop.fs.irods.mapred.output.compression.type",
            CompressionType.RECORD.toString());
    return CompressionType.valueOf(val);
}

From source file:edu.berkeley.cs.amplab.adam.io.InterleavedFastqInputFormat.java

License:Apache License

@Override
public boolean isSplitable(JobContext context, Path path) {
    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
    return codec == null;
}

From source file:edu.indiana.d2i.htrc.io.dataapi.IDInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6);
    String hostStr = job.getConfiguration().get(HTRCConstants.HOSTS_SEPARATEDBY_COMMA,
            HTRCConstants.DATA_API_DEFAULT_URL);
    if (hostStr == null)
        throw new RuntimeException("Cannot find hosts of HTRC Data Storage.");
    String[] hosts = hostStr.split(",");

    IDInputSplit split = new IDInputSplit(hosts);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);
    try {/*ww w .  ja  va  2s.co m*/
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            Iterator<Text> idlist = new IDList(fsinput).iterator();
            while (idlist.hasNext()) {
                Text id = idlist.next();
                split.addID(id.toString());
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    split = new IDInputSplit(hosts);
                }
            }

            //            LineReader reader = new LineReader(fsinput);
            //            Text line = new Text();
            //            while (reader.readLine(line) > 0) {
            //               split.addID(line.toString());
            //               if (split.getLength() >= numIdsInSplit) {
            //                  splits.add(split);
            //                  split = new IDInputSplit(hosts);
            //               }
            //            }
            //            reader.close();
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.indiana.d2i.htrc.io.index.lucene.LuceneIDFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, (int) 1e6);

    String line = null;//from  w w  w .  j  a  v a  2  s . c  om
    IDInputSplit split = new IDInputSplit();
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);

    try {
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput));
            while ((line = reader.readLine()) != null) {
                split.addID(line);
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    split = new IDInputSplit();
                }
            }
            reader.close();
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.indiana.d2i.htrc.io.mem.MemIDInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    int numIdsInSplit = job.getConfiguration().getInt(HTRCConstants.MAX_IDNUM_SPLIT, 8000);
    //      String[] hosts = job.getConfiguration().getStrings(HTRCConstants.MEMCACHED_HOSTS);
    //      if (hosts == null)
    //         throw new IllegalArgumentException("No host is found for memcached");

    //      IDInputSplit split = new IDInputSplit(hosts);
    IDInputSplit split = new IDInputSplit();
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path[] dirs = getInputPaths(job);
    try {/*from   w  w  w  .j  a  va  2  s . c o  m*/
        for (int i = 0; i < dirs.length; i++) {
            FileSystem fs = dirs[i].getFileSystem(job.getConfiguration());
            DataInputStream fsinput = new DataInputStream(fs.open(dirs[i]));
            Iterator<Text> idlist = new IDList(fsinput).iterator();
            while (idlist.hasNext()) {
                Text id = idlist.next();
                split.addID(id.toString());
                if (split.getLength() >= numIdsInSplit) {
                    splits.add(split);
                    //                  split = new IDInputSplit(hosts);
                    split = new IDInputSplit();
                }
            }
        }
        if (split != null && split.getLength() != 0)
            splits.add(split);
    } catch (InterruptedException e) {
        logger.error(e);
    }

    logger.info("#Splits " + splits.size());
    return splits;
}

From source file:edu.iu.common.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;//from w  w w.  j  a  va2 s  .  c o  m
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    // Random random = new Random(System.nanoTime());
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<Path>();
            hostSet = new HashSet<String>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}

From source file:edu.iu.fileformat.MultiFileInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    // Generate splits
    List<InputSplit> splits = new ArrayList<>();
    List<FileStatus> files = listStatus(job);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    int numMaps = jobConf.getNumMapTasks();
    LOG.info("NUMBER OF FILES: " + files.size());
    LOG.info("NUMBER OF MAPS: " + numMaps);
    // randomizeFileListOrder(files);
    int avg = files.size() / numMaps;
    int rest = files.size() % numMaps;
    int tmp = 0;/* w w  w . j  a v  a 2 s  .c om*/
    long length = 0;
    List<Path> pathList = null;
    Set<String> hostSet = null;
    for (FileStatus file : files) {
        if (tmp == 0) {
            pathList = new ArrayList<>();
            hostSet = new HashSet<>();
        }
        if (tmp < avg) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            tmp++;
            if (tmp == avg && rest == 0) {
                LOG.info("Split on host: " + getHostsString(hostSet));
                splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
                tmp = 0;
                length = 0;
            }
        } else if (tmp == avg && rest > 0) {
            pathList.add(file.getPath());
            length = length + file.getLen();
            FileSystem fs = file.getPath().getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, file.getLen());
            for (BlockLocation blockLocation : blkLocations) {
                for (String host : blockLocation.getHosts()) {
                    hostSet.add(host);
                }
            }
            rest--;
            LOG.info("Split on host: " + getHostsString(hostSet));
            splits.add(new MultiFileSplit(pathList, length, hostSet.toArray(new String[0])));
            tmp = 0;
            length = 0;
        }
    }
    // Save the number of input files in the
    // job-conf
    job.getConfiguration().setLong(NUM_INPUT_FILES, numMaps);
    LOG.info("Total # of splits: " + splits.size());
    return splits;
}