Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Creates {@link AccumuloClient} from the configuration
 *///from  w w  w  .  j  a v  a  2s . c om
private static AccumuloClient createClient(JobContext context, Class<?> callingClass) {
    return InputConfigurator.createClient(callingClass, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.InputFormatBase.java

License:Apache License

/**
 * Determines whether a configuration has the {@link org.apache.accumulo.core.client.BatchScanner}
 * feature enabled.//from  ww  w  .ja v a 2 s.  co  m
 *
 * @param context
 *          the Hadoop context for the configured job
 * @since 1.7.0
 * @see #setBatchScan(Job, boolean)
 */
protected static boolean isBatchScan(JobContext context) {
    return InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
}

From source file:org.apache.bigtop.bigpetstore.generator.GeneratePetStoreTransactionsInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext arg) throws IOException {
    int num_records_desired = arg.getConfiguration()
            .getInt(GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1);
    if (num_records_desired == -1) {
        throw new RuntimeException(
                "# of total records not set in configuration object: " + arg.getConfiguration());
    }//w ww.  j a  va 2  s.c o m

    ArrayList<InputSplit> list = new ArrayList<InputSplit>();

    /**
     * Generator class will take a state as input and generate all the data
     * for that state.
     */
    for (TransactionIteratorFactory.STATE s : STATE.values()) {
        PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(
                (int) (Math.ceil(num_records_desired * s.probability)), s);
        System.out.println(s + " _ " + split.records);
        list.add(split);
    }
    return list;
}

From source file:org.apache.bigtop.bigpetstore.generator.PetStoreTransactionsInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext arg) throws IOException {
    int numRecordsDesired = arg.getConfiguration()
            .getInt(PetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1);
    if (numRecordsDesired == -1) {
        throw new RuntimeException(
                "# of total records not set in configuration object: " + arg.getConfiguration());
    }//from ww w  .j  av  a  2 s. co  m

    List<InputSplit> list = new ArrayList<InputSplit>();
    long customerIdStart = 1;
    for (State s : State.values()) {
        int numRecords = numRecords(numRecordsDesired, s.probability);
        // each state is assigned a range of customer-ids from which it can choose.
        // The number of customers can be as many as the number of transactions.
        Range<Long> customerIdRange = Range.between(customerIdStart, customerIdStart + numRecords - 1);
        PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(numRecords, customerIdRange, s);
        System.out.println(s + " _ " + split.records);
        list.add(split);
        customerIdStart += numRecords;
    }
    return list;
}

From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    Path[] dirs = getInputPaths(context);
    List<BlurInputSplit> splits;
    Configuration configuration = context.getConfiguration();
    if (isSplitCommandSupported(configuration)) {
        splits = getSplitsFromCommand(configuration, dirs);
    } else {/*  w ww .ja v  a  2  s .com*/
        splits = getSplits(configuration, dirs);
    }
    return toList(getMaxNumberOfMaps(configuration), splits);
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext jobContext) throws IOException {
    // look through all the shards for attempts that need to be cleaned up.
    // also find all the attempts that are finished
    // then rename all the attempts jobs to commits
    LOG.info("Commiting Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    LOG.info("TableOutput path [{0}]", tableOutput);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath());
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), true);
        }//  w w w  .j  av  a2s . co  m
    }
    LOG.info("Commiting Complete [{0}]", jobContext.getJobID());
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

private void commitOrAbortJob(JobContext jobContext, Path shardPath, boolean commit) throws IOException {
    LOG.info("CommitOrAbort [{0}] path [{1}]", commit, shardPath);
    FileSystem fileSystem = shardPath.getFileSystem(jobContext.getConfiguration());
    FileStatus[] listStatus = fileSystem.listStatus(shardPath, new PathFilter() {
        @Override/*from  w  ww .j  av a 2s . c  o m*/
        public boolean accept(Path path) {
            LOG.info("Checking path [{0}]", path);
            if (path.getName().endsWith(".task_complete")) {
                return true;
            }
            return false;
        }
    });
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        LOG.info("Trying to commitOrAbort [{0}]", path);
        String name = path.getName();
        boolean taskComplete = name.endsWith(".task_complete");
        if (fileStatus.isDir()) {
            String taskAttemptName = getTaskAttemptName(name);
            if (taskAttemptName == null) {
                LOG.info("Dir name [{0}] not task attempt", name);
                continue;
            }
            TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptName);
            if (taskAttemptID.getJobID().equals(jobContext.getJobID())) {
                if (commit) {
                    if (taskComplete) {
                        fileSystem.rename(path, new Path(shardPath, taskAttemptName + ".commit"));
                        LOG.info("Committing [{0}] in path [{1}]", taskAttemptID, path);
                    } else {
                        fileSystem.delete(path, true);
                        LOG.info("Deleting tmp dir [{0}] in path [{1}]", taskAttemptID, path);
                    }
                } else {
                    fileSystem.delete(path, true);
                    LOG.info("Deleting aborted job dir [{0}] in path [{1}]", taskAttemptID, path);
                }
            } else {
                LOG.warn("TaskAttempt JobID [{0}] does not match JobContext JobId [{1}]",
                        taskAttemptID.getJobID(), jobContext.getJobID());
            }
        }
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
    LOG.info("Abort Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), false);
        }//from  w ww . j a  va 2 s.  com
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    CheckOutputSpecs.checkOutputSpecs(context.getConfiguration(), context.getNumReduceTasks());
}

From source file:org.apache.carbondata.hadoop.api.CarbonFileInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//from w  w w . j  av  a 2  s .c  om
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();

    if (getValidateSegmentsToAccess(job.getConfiguration())) {
        // get all valid segments and set them into the configuration
        // check for externalTable segment (Segment_null)
        // process and resolve the expression

        ReadCommittedScope readCommittedScope = null;
        if (carbonTable.isTransactionalTable()) {
            readCommittedScope = new LatestFilesReadCommittedScope(
                    identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
        } else {
            readCommittedScope = getReadCommittedScope(job.getConfiguration());
            if (readCommittedScope == null) {
                readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(),
                        job.getConfiguration());
            } else {
                readCommittedScope.setConfiguration(job.getConfiguration());
            }
        }
        // this will be null in case of corrupt schema file.
        PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
        Expression filter = getFilterPredicates(job.getConfiguration());

        // if external table Segments are found, add it to the List
        List<Segment> externalTableSegments = new ArrayList<Segment>();
        Segment seg;
        if (carbonTable.isTransactionalTable()) {
            // SDK some cases write into the Segment Path instead of Table Path i.e. inside
            // the "Fact/Part0/Segment_null". The segment in this case is named as "null".
            // The table is denoted by default as a transactional table and goes through
            // the path of CarbonFileInputFormat. The above scenario is handled in the below code.
            seg = new Segment("null", null, readCommittedScope);
            externalTableSegments.add(seg);
        } else {
            LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
            for (LoadMetadataDetails load : loadMetadataDetails) {
                seg = new Segment(load.getLoadName(), null, readCommittedScope);
                externalTableSegments.add(seg);
            }
        }
        List<InputSplit> splits = new ArrayList<>();
        boolean useBlockDataMap = job.getConfiguration().getBoolean("filter_blocks", true);
        // useBlockDataMap would be false in case of SDK when user has not provided any filter, In
        // this case we don't want to load block/blocklet datamap. It would be true in all other
        // scenarios
        if (useBlockDataMap) {
            // do block filtering and get split
            splits = getSplits(job, filter, externalTableSegments, null, partitionInfo, null);
        } else {
            for (CarbonFile carbonFile : getAllCarbonDataFiles(carbonTable.getTablePath())) {
                // Segment id is set to null because SDK does not write carbondata files with respect
                // to segments. So no specific name is present for this load.
                CarbonInputSplit split = new CarbonInputSplit("null", new Path(carbonFile.getAbsolutePath()), 0,
                        carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
                split.setVersion(ColumnarFormatVersion.V3);
                BlockletDetailInfo info = new BlockletDetailInfo();
                split.setDetailInfo(info);
                info.setBlockSize(carbonFile.getLength());
                info.setVersionNumber(split.getVersion().number());
                info.setUseMinMaxForPruning(false);
                splits.add(split);
            }
            Collections.sort(splits, new Comparator<InputSplit>() {
                @Override
                public int compare(InputSplit o1, InputSplit o2) {
                    return ((CarbonInputSplit) o1).getPath().compareTo(((CarbonInputSplit) o2).getPath());
                }
            });
        }
        if (getColumnProjection(job.getConfiguration()) == null) {
            // If the user projection is empty, use default all columns as projections.
            // All column name will be filled inside getSplits, so can update only here.
            String[] projectionColumns = projectAllColumns(carbonTable);
            setColumnProjection(job.getConfiguration(), projectionColumns);
        }
        return splits;
    }
    return null;
}