Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java

License:Apache License

/**
 * Creates {@link AccumuloClient} from the configuration
 *///from  w w  w  .  j  a v  a  2s . c om
private static AccumuloClient createClient(JobContext context, Class<?> callingClass) {
    return InputConfigurator.createClient(callingClass, context.getConfiguration());
}

From source file:org.apache.accumulo.hadoopImpl.mapreduce.InputFormatBase.java

License:Apache License

/**
 * Determines whether a configuration has the {@link org.apache.accumulo.core.client.BatchScanner}
 * feature enabled.//from  ww  w  .ja v a 2 s.  co  m
 *
 * @param context
 *          the Hadoop context for the configured job
 * @since 1.7.0
 * @see #setBatchScan(Job, boolean)
 */
protected static boolean isBatchScan(JobContext context) {
    return InputConfigurator.isBatchScan(CLASS, context.getConfiguration());
}

From source file:org.apache.bigtop.bigpetstore.generator.GeneratePetStoreTransactionsInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext arg) throws IOException {
    int num_records_desired = arg.getConfiguration()
            .getInt(GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1);
    if (num_records_desired == -1) {
        throw new RuntimeException(
                "# of total records not set in configuration object: " + arg.getConfiguration());
    }//w ww.  j a  va 2  s.c o m

    ArrayList<InputSplit> list = new ArrayList<InputSplit>();

    /**
     * Generator class will take a state as input and generate all the data
     * for that state.
     */
    for (TransactionIteratorFactory.STATE s : STATE.values()) {
        PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(
                (int) (Math.ceil(num_records_desired * s.probability)), s);
        System.out.println(s + " _ " + split.records);
        list.add(split);
    }
    return list;
}

From source file:org.apache.bigtop.bigpetstore.generator.PetStoreTransactionsInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext arg) throws IOException {
    int numRecordsDesired = arg.getConfiguration()
            .getInt(PetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1);
    if (numRecordsDesired == -1) {
        throw new RuntimeException(
                "# of total records not set in configuration object: " + arg.getConfiguration());
    }//from ww w  .j  av  a  2 s. co  m

    List<InputSplit> list = new ArrayList<InputSplit>();
    long customerIdStart = 1;
    for (State s : State.values()) {
        int numRecords = numRecords(numRecordsDesired, s.probability);
        // each state is assigned a range of customer-ids from which it can choose.
        // The number of customers can be as many as the number of transactions.
        Range<Long> customerIdRange = Range.between(customerIdStart, customerIdStart + numRecords - 1);
        PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(numRecords, customerIdRange, s);
        System.out.println(s + " _ " + split.records);
        list.add(split);
        customerIdStart += numRecords;
    }
    return list;
}

From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
    Path[] dirs = getInputPaths(context);
    List<BlurInputSplit> splits;
    Configuration configuration = context.getConfiguration();
    if (isSplitCommandSupported(configuration)) {
        splits = getSplitsFromCommand(configuration, dirs);
    } else {/*  w ww .ja v  a  2  s .com*/
        splits = getSplits(configuration, dirs);
    }
    return toList(getMaxNumberOfMaps(configuration), splits);
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext jobContext) throws IOException {
    // look through all the shards for attempts that need to be cleaned up.
    // also find all the attempts that are finished
    // then rename all the attempts jobs to commits
    LOG.info("Commiting Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    LOG.info("TableOutput path [{0}]", tableOutput);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath());
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), true);
        }//  w w w  .j  av  a2s . co  m
    }
    LOG.info("Commiting Complete [{0}]", jobContext.getJobID());
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

private void commitOrAbortJob(JobContext jobContext, Path shardPath, boolean commit) throws IOException {
    LOG.info("CommitOrAbort [{0}] path [{1}]", commit, shardPath);
    FileSystem fileSystem = shardPath.getFileSystem(jobContext.getConfiguration());
    FileStatus[] listStatus = fileSystem.listStatus(shardPath, new PathFilter() {
        @Override/*from  w  ww .j  av a 2s . c  o m*/
        public boolean accept(Path path) {
            LOG.info("Checking path [{0}]", path);
            if (path.getName().endsWith(".task_complete")) {
                return true;
            }
            return false;
        }
    });
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        LOG.info("Trying to commitOrAbort [{0}]", path);
        String name = path.getName();
        boolean taskComplete = name.endsWith(".task_complete");
        if (fileStatus.isDir()) {
            String taskAttemptName = getTaskAttemptName(name);
            if (taskAttemptName == null) {
                LOG.info("Dir name [{0}] not task attempt", name);
                continue;
            }
            TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptName);
            if (taskAttemptID.getJobID().equals(jobContext.getJobID())) {
                if (commit) {
                    if (taskComplete) {
                        fileSystem.rename(path, new Path(shardPath, taskAttemptName + ".commit"));
                        LOG.info("Committing [{0}] in path [{1}]", taskAttemptID, path);
                    } else {
                        fileSystem.delete(path, true);
                        LOG.info("Deleting tmp dir [{0}] in path [{1}]", taskAttemptID, path);
                    }
                } else {
                    fileSystem.delete(path, true);
                    LOG.info("Deleting aborted job dir [{0}] in path [{1}]", taskAttemptID, path);
                }
            } else {
                LOG.warn("TaskAttempt JobID [{0}] does not match JobContext JobId [{1}]",
                        taskAttemptID.getJobID(), jobContext.getJobID());
            }
        }
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
    LOG.info("Abort Job [{0}]", jobContext.getJobID());
    Configuration configuration = jobContext.getConfiguration();
    Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
    makeSureNoEmptyShards(configuration, tableOutput);
    FileSystem fileSystem = tableOutput.getFileSystem(configuration);
    for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
        if (isShard(fileStatus)) {
            commitOrAbortJob(jobContext, fileStatus.getPath(), false);
        }//from  w ww . j a  va 2 s.  com
    }
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    CheckOutputSpecs.checkOutputSpecs(context.getConfiguration(), context.getNumReduceTasks());
}

From source file:org.apache.carbondata.hadoop.api.CarbonFileInputFormat.java

License:Apache License

/**
 * {@inheritDoc}//from w  w w . j  av  a 2  s .c  om
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();

    if (getValidateSegmentsToAccess(job.getConfiguration())) {
        // get all valid segments and set them into the configuration
        // check for externalTable segment (Segment_null)
        // process and resolve the expression

        ReadCommittedScope readCommittedScope = null;
        if (carbonTable.isTransactionalTable()) {
            readCommittedScope = new LatestFilesReadCommittedScope(
                    identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
        } else {
            readCommittedScope = getReadCommittedScope(job.getConfiguration());
            if (readCommittedScope == null) {
                readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(),
                        job.getConfiguration());
            } else {
                readCommittedScope.setConfiguration(job.getConfiguration());
            }
        }
        // this will be null in case of corrupt schema file.
        PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
        Expression filter = getFilterPredicates(job.getConfiguration());

        // if external table Segments are found, add it to the List
        List<Segment> externalTableSegments = new ArrayList<Segment>();
        Segment seg;
        if (carbonTable.isTransactionalTable()) {
            // SDK some cases write into the Segment Path instead of Table Path i.e. inside
            // the "Fact/Part0/Segment_null". The segment in this case is named as "null".
            // The table is denoted by default as a transactional table and goes through
            // the path of CarbonFileInputFormat. The above scenario is handled in the below code.
            seg = new Segment("null", null, readCommittedScope);
            externalTableSegments.add(seg);
        } else {
            LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
            for (LoadMetadataDetails load : loadMetadataDetails) {
                seg = new Segment(load.getLoadName(), null, readCommittedScope);
                externalTableSegments.add(seg);
            }
        }
        List<InputSplit> splits = new ArrayList<>();
        boolean useBlockDataMap = job.getConfiguration().getBoolean("filter_blocks", true);
        // useBlockDataMap would be false in case of SDK when user has not provided any filter, In
        // this case we don't want to load block/blocklet datamap. It would be true in all other
        // scenarios
        if (useBlockDataMap) {
            // do block filtering and get split
            splits = getSplits(job, filter, externalTableSegments, null, partitionInfo, null);
        } else {
            for (CarbonFile carbonFile : getAllCarbonDataFiles(carbonTable.getTablePath())) {
                // Segment id is set to null because SDK does not write carbondata files with respect
                // to segments. So no specific name is present for this load.
                CarbonInputSplit split = new CarbonInputSplit("null", new Path(carbonFile.getAbsolutePath()), 0,
                        carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
                split.setVersion(ColumnarFormatVersion.V3);
                BlockletDetailInfo info = new BlockletDetailInfo();
                split.setDetailInfo(info);
                info.setBlockSize(carbonFile.getLength());
                info.setVersionNumber(split.getVersion().number());
                info.setUseMinMaxForPruning(false);
                splits.add(split);
            }
            Collections.sort(splits, new Comparator<InputSplit>() {
                @Override
                public int compare(InputSplit o1, InputSplit o2) {
                    return ((CarbonInputSplit) o1).getPath().compareTo(((CarbonInputSplit) o2).getPath());
                }
            });
        }
        if (getColumnProjection(job.getConfiguration()) == null) {
            // If the user projection is empty, use default all columns as projections.
            // All column name will be filled inside getSplits, so can update only here.
            String[] projectionColumns = projectAllColumns(carbonTable);
            setColumnProjection(job.getConfiguration(), projectionColumns);
        }
        return splits;
    }
    return null;
}