Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static int getHllSize(JobContext context) {
    return context.getConfiguration().getInt(PinotOutputFormat.HLL_SIZE, 9);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static String getHllColumns(JobContext context) {
    return context.getConfiguration().get(PinotOutputFormat.HLL_COLUMNS);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static String getHllSuffix(JobContext context) {
    return context.getConfiguration().get(PinotOutputFormat.HLL_SUFFIX,
            HllConstants.DEFAULT_HLL_DERIVE_COLUMN_SUFFIX);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormat.java

License:Apache License

public static Class<?> getDataWriteSupportClass(JobContext context) {
    String className = context.getConfiguration().get(PinotOutputFormat.PINOT_RECORD_SERIALIZATION_CLASS);
    if (className == null) {
        throw new RuntimeException("pinot data write support class not set");
    }//from   ww  w  .  jav a2s. c o  m
    try {
        return context.getConfiguration().getClassByName(className);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.linkedin.whiteelephant.mapreduce.MyAvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private static Class<? extends OutputFormat<?, ?>> getNamedOutputFormatClass(JobContext job,
        String namedOutput) {/*ww  w.j  a va2s.  c om*/
    return (Class<? extends OutputFormat<?, ?>>) job.getConfiguration()
            .getClass(MO_PREFIX + namedOutput + FORMAT, null, OutputFormat.class);
}

From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = super.getSplits(job);
    List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
    CombineDocumentSplit split = null;//from w w w .j a  va  2 s .  c o  m
    for (InputSplit file : splits) {
        Path path = ((FileSplit) file).getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus status = fs.getFileStatus(path);
        long length = status.getLen();
        long blockSize = status.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        if (length != 0) {
            if (split == null) {
                split = new CombineDocumentSplit();
            }

            try {
                if (split.getLength() + length < splitSize || split.getLength() < minSize) {
                    split.addSplit((FileSplit) file);
                } else {
                    combinedSplits.add(split);
                    split = new CombineDocumentSplit();
                    split.addSplit((FileSplit) file);
                }
            } catch (InterruptedException e) {
                LOG.error(e);
                throw new RuntimeException(e);
            }
        }
    }
    if (split != null) {
        combinedSplits.add(split);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits: " + splits.size());
        LOG.debug("Total # of combined splits: " + combinedSplits.size());
    }

    return combinedSplits;
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }//from  ww  w  .  j  a  v a 2 s .  c om

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}

From source file:com.marklogic.contentpump.DocumentInputFormat.java

License:Apache License

protected void getForestSplits(JobContext jobContext, ResultSequence result, List<ForestSplit> forestSplits,
        List<String> ruleUris) throws IOException {
    Configuration jobConf = jobContext.getConfiguration();
    super.getForestSplits(jobContext, result, forestSplits, ruleUris);
    // Third while loop: audit settings
    while (result.hasNext()) {
        ResultItem item = result.next();
        if (ItemType.XS_STRING != item.getItemType()) {
            throw new IOException("Unexpected item type " + item.getItemType().toString());
        }/* w  ww.j  av  a  2s .  co m*/
        String itemStr = ((XSString) item.getItem()).asString();
        if ("AUDIT".equals(itemStr)) {
            continue;
        } else if ("mlcp-start".equals(itemStr)) {
            mlcpStartEventEnabled = true;
        } else if ("mlcp-finish".equalsIgnoreCase(itemStr)) {
            mlcpFinishEventEnabled = true;
        } else {
            throw new IOException("Unrecognized audit event " + itemStr);
        }
    }
    if (ruleUris != null && ruleUris.size() > 0) {
        AuditUtil.prepareAuditMlcpFinish(jobConf, ruleUris.size());
        if (LOG.isDebugEnabled()) {
            // TODO: Use this version if only JAVA 8 is supported
            // String logMessage = String.join(", ", ruleUris);
            LOG.debug("Redaction rules applied: " + StringUtils.join(ruleUris, ", "));
        }
    }
    if (mlcpStartEventEnabled) {
        AuditUtil.auditMlcpStart(jobConf, jobContext.getJobName());
    }
    jobConf.setBoolean(ConfigConstants.CONF_AUDIT_MLCPFINISH_ENABLED, mlcpFinishEventEnabled);
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path filename) {
    Configuration conf = context.getConfiguration();
    return conf.getBoolean(ConfigConstants.CONF_SPLIT_INPUT, false)
            && !conf.getBoolean(ConfigConstants.INPUT_COMPRESSED, false);
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Configuration conf = job.getConfiguration();
    try {/*from w  w w .j  av a  2 s.co  m*/
        List<FileStatus> files = listStatus(job);

        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (FileStatus child : files) {
            Path path = child.getPath();
            FileSystem fs = path.getFileSystem(conf);
            // length is 0 for dir according to FSDirectory.java in 0.20
            // however, w/ Hadoop2, dir in local fs has non-zero length
            long length = child.getLen();
            BlockLocation[] blkLocations = null;
            if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) {
                blkLocations = fs.getFileBlockLocations(child, 0, length);
            } else if (length != 0) {
                throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
            }

            if ((length != 0) && isSplitable(job, path)) {
                long blockSize = child.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkLocations.length - 1].getHosts()));
                }
            } else if (length != 0) {
                splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
            } else {
                // Create empty hosts array for zero length files
                splits.add(new FileSplit(path, 0, length, new String[0]));
            }
        }
    } catch (InvalidInputException ex) {
        String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY);
        String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*");
        throw new IOException("No input files found with the specified input path " + inPath
                + " and input file pattern " + pattern, ex);
    }

    PathFilter jobFilter = getInputPathFilter(job);
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    // take a second pass of the splits generated to extract files from
    // directories
    int count = 0;
    // flatten directories until reaching SPLIT_COUNT_LIMIT
    while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) {
        FileSplit split = (FileSplit) splits.get(count);
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(file);
        if (status.isDirectory()) {
            FileStatus[] children = fs.listStatus(file, inputFilter);
            if (children.length + count < SPLIT_COUNT_LIMIT) {
                splits.remove(count);
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    splits.add(child);
                }
            } else {
                count++;
            }
        } else {
            count++;
        }
    }
    return splits;
}