Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static int getHllSize(JobContext context) {
    return context.getConfiguration().getInt(PinotOutputFormat.HLL_SIZE, 9);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static String getHllColumns(JobContext context) {
    return context.getConfiguration().get(PinotOutputFormat.HLL_COLUMNS);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static String getHllSuffix(JobContext context) {
    return context.getConfiguration().get(PinotOutputFormat.HLL_SUFFIX,
            HllConstants.DEFAULT_HLL_DERIVE_COLUMN_SUFFIX);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static Class<?> getDataWriteSupportClass(JobContext context) {
    String className = context.getConfiguration().get(PinotOutputFormat.PINOT_RECORD_SERIALIZATION_CLASS);
    if (className == null) {
        throw new RuntimeException("pinot data write support class not set");
    }//from   ww  w  .  jav a2s. c o  m
    try {
        return context.getConfiguration().getClassByName(className);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.linkedin.whiteelephant.mapreduce.MyAvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private static Class<? extends OutputFormat<?, ?>> getNamedOutputFormatClass(JobContext job,
        String namedOutput) {/*ww  w.j  a va2s.  c om*/
    return (Class<? extends OutputFormat<?, ?>>) job.getConfiguration()
            .getClass(MO_PREFIX + namedOutput + FORMAT, null, OutputFormat.class);
}

From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = super.getSplits(job);
    List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
    CombineDocumentSplit split = null;//from w w w .j a  va  2 s .  c o  m
    for (InputSplit file : splits) {
        Path path = ((FileSplit) file).getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus status = fs.getFileStatus(path);
        long length = status.getLen();
        long blockSize = status.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        if (length != 0) {
            if (split == null) {
                split = new CombineDocumentSplit();
            }

            try {
                if (split.getLength() + length < splitSize || split.getLength() < minSize) {
                    split.addSplit((FileSplit) file);
                } else {
                    combinedSplits.add(split);
                    split = new CombineDocumentSplit();
                    split.addSplit((FileSplit) file);
                }
            } catch (InterruptedException e) {
                LOG.error(e);
                throw new RuntimeException(e);
            }
        }
    }
    if (split != null) {
        combinedSplits.add(split);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits: " + splits.size());
        LOG.debug("Total # of combined splits: " + combinedSplits.size());
    }

    return combinedSplits;
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }//from  ww  w  .  j  a  v a 2 s .  c om

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}

From source file:com.marklogic.contentpump.DocumentInputFormat.java

License:Apache License

protected void getForestSplits(JobContext jobContext, ResultSequence result, List<ForestSplit> forestSplits,
        List<String> ruleUris) throws IOException {
    Configuration jobConf = jobContext.getConfiguration();
    super.getForestSplits(jobContext, result, forestSplits, ruleUris);
    // Third while loop: audit settings
    while (result.hasNext()) {
        ResultItem item = result.next();
        if (ItemType.XS_STRING != item.getItemType()) {
            throw new IOException("Unexpected item type " + item.getItemType().toString());
        }/* w  ww.j  av  a  2s .  co m*/
        String itemStr = ((XSString) item.getItem()).asString();
        if ("AUDIT".equals(itemStr)) {
            continue;
        } else if ("mlcp-start".equals(itemStr)) {
            mlcpStartEventEnabled = true;
        } else if ("mlcp-finish".equalsIgnoreCase(itemStr)) {
            mlcpFinishEventEnabled = true;
        } else {
            throw new IOException("Unrecognized audit event " + itemStr);
        }
    }
    if (ruleUris != null && ruleUris.size() > 0) {
        AuditUtil.prepareAuditMlcpFinish(jobConf, ruleUris.size());
        if (LOG.isDebugEnabled()) {
            // TODO: Use this version if only JAVA 8 is supported
            // String logMessage = String.join(", ", ruleUris);
            LOG.debug("Redaction rules applied: " + StringUtils.join(ruleUris, ", "));
        }
    }
    if (mlcpStartEventEnabled) {
        AuditUtil.auditMlcpStart(jobConf, jobContext.getJobName());
    }
    jobConf.setBoolean(ConfigConstants.CONF_AUDIT_MLCPFINISH_ENABLED, mlcpFinishEventEnabled);
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    Configuration conf = context.getConfiguration();
    return conf.getBoolean(ConfigConstants.CONF_SPLIT_INPUT, false)
            && !conf.getBoolean(ConfigConstants.INPUT_COMPRESSED, false);
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Configuration conf = job.getConfiguration();
    try {/*from w  w w .j  av a  2 s.co  m*/
        List<FileStatus> files = listStatus(job);

        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (FileStatus child : files) {
            Path path = child.getPath();
            FileSystem fs = path.getFileSystem(conf);
            // length is 0 for dir according to FSDirectory.java in 0.20
            // however, w/ Hadoop2, dir in local fs has non-zero length
            long length = child.getLen();
            BlockLocation[] blkLocations = null;
            if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) {
                blkLocations = fs.getFileBlockLocations(child, 0, length);
            } else if (length != 0) {
                throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
            }

            if ((length != 0) && isSplitable(job, path)) {
                long blockSize = child.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkLocations.length - 1].getHosts()));
                }
            } else if (length != 0) {
                splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
            } else {
                // Create empty hosts array for zero length files
                splits.add(new FileSplit(path, 0, length, new String[0]));
            }
        }
    } catch (InvalidInputException ex) {
        String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY);
        String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*");
        throw new IOException("No input files found with the specified input path " + inPath
                + " and input file pattern " + pattern, ex);
    }

    PathFilter jobFilter = getInputPathFilter(job);
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    // take a second pass of the splits generated to extract files from
    // directories
    int count = 0;
    // flatten directories until reaching SPLIT_COUNT_LIMIT
    while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) {
        FileSplit split = (FileSplit) splits.get(count);
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(file);
        if (status.isDirectory()) {
            FileStatus[] children = fs.listStatus(file, inputFilter);
            if (children.length + count < SPLIT_COUNT_LIMIT) {
                splits.remove(count);
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    splits.add(child);
                }
            } else {
                count++;
            }
        } else {
            count++;
        }
    }
    return splits;
}