List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:org.apache.accumulo.hadoopImpl.mapreduce.AccumuloRecordReader.java
License:Apache License
/** * Creates {@link AccumuloClient} from the configuration *///from w w w . j a v a 2s . c om private static AccumuloClient createClient(JobContext context, Class<?> callingClass) { return InputConfigurator.createClient(callingClass, context.getConfiguration()); }
From source file:org.apache.accumulo.hadoopImpl.mapreduce.InputFormatBase.java
License:Apache License
/** * Determines whether a configuration has the {@link org.apache.accumulo.core.client.BatchScanner} * feature enabled.//from ww w .ja v a 2 s. co m * * @param context * the Hadoop context for the configured job * @since 1.7.0 * @see #setBatchScan(Job, boolean) */ protected static boolean isBatchScan(JobContext context) { return InputConfigurator.isBatchScan(CLASS, context.getConfiguration()); }
From source file:org.apache.bigtop.bigpetstore.generator.GeneratePetStoreTransactionsInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext arg) throws IOException { int num_records_desired = arg.getConfiguration() .getInt(GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1); if (num_records_desired == -1) { throw new RuntimeException( "# of total records not set in configuration object: " + arg.getConfiguration()); }//w ww. j a va 2 s.c o m ArrayList<InputSplit> list = new ArrayList<InputSplit>(); /** * Generator class will take a state as input and generate all the data * for that state. */ for (TransactionIteratorFactory.STATE s : STATE.values()) { PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit( (int) (Math.ceil(num_records_desired * s.probability)), s); System.out.println(s + " _ " + split.records); list.add(split); } return list; }
From source file:org.apache.bigtop.bigpetstore.generator.PetStoreTransactionsInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext arg) throws IOException { int numRecordsDesired = arg.getConfiguration() .getInt(PetStoreTransactionsInputFormat.props.bigpetstore_records.name(), -1); if (numRecordsDesired == -1) { throw new RuntimeException( "# of total records not set in configuration object: " + arg.getConfiguration()); }//from ww w .j av a 2 s. co m List<InputSplit> list = new ArrayList<InputSplit>(); long customerIdStart = 1; for (State s : State.values()) { int numRecords = numRecords(numRecordsDesired, s.probability); // each state is assigned a range of customer-ids from which it can choose. // The number of customers can be as many as the number of transactions. Range<Long> customerIdRange = Range.between(customerIdStart, customerIdStart + numRecords - 1); PetStoreTransactionInputSplit split = new PetStoreTransactionInputSplit(numRecords, customerIdRange, s); System.out.println(s + " _ " + split.records); list.add(split); customerIdStart += numRecords; } return list; }
From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { Path[] dirs = getInputPaths(context); List<BlurInputSplit> splits; Configuration configuration = context.getConfiguration(); if (isSplitCommandSupported(configuration)) { splits = getSplitsFromCommand(configuration, dirs); } else {/* w ww .ja v a 2 s .com*/ splits = getSplits(configuration, dirs); } return toList(getMaxNumberOfMaps(configuration), splits); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext jobContext) throws IOException { // look through all the shards for attempts that need to be cleaned up. // also find all the attempts that are finished // then rename all the attempts jobs to commits LOG.info("Commiting Job [{0}]", jobContext.getJobID()); Configuration configuration = jobContext.getConfiguration(); Path tableOutput = BlurOutputFormat.getOutputPath(configuration); LOG.info("TableOutput path [{0}]", tableOutput); makeSureNoEmptyShards(configuration, tableOutput); FileSystem fileSystem = tableOutput.getFileSystem(configuration); for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) { LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath()); if (isShard(fileStatus)) { commitOrAbortJob(jobContext, fileStatus.getPath(), true); }// w w w .j av a2s . co m } LOG.info("Commiting Complete [{0}]", jobContext.getJobID()); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
private void commitOrAbortJob(JobContext jobContext, Path shardPath, boolean commit) throws IOException { LOG.info("CommitOrAbort [{0}] path [{1}]", commit, shardPath); FileSystem fileSystem = shardPath.getFileSystem(jobContext.getConfiguration()); FileStatus[] listStatus = fileSystem.listStatus(shardPath, new PathFilter() { @Override/*from w ww .j av a 2s . c o m*/ public boolean accept(Path path) { LOG.info("Checking path [{0}]", path); if (path.getName().endsWith(".task_complete")) { return true; } return false; } }); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); LOG.info("Trying to commitOrAbort [{0}]", path); String name = path.getName(); boolean taskComplete = name.endsWith(".task_complete"); if (fileStatus.isDir()) { String taskAttemptName = getTaskAttemptName(name); if (taskAttemptName == null) { LOG.info("Dir name [{0}] not task attempt", name); continue; } TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptName); if (taskAttemptID.getJobID().equals(jobContext.getJobID())) { if (commit) { if (taskComplete) { fileSystem.rename(path, new Path(shardPath, taskAttemptName + ".commit")); LOG.info("Committing [{0}] in path [{1}]", taskAttemptID, path); } else { fileSystem.delete(path, true); LOG.info("Deleting tmp dir [{0}] in path [{1}]", taskAttemptID, path); } } else { fileSystem.delete(path, true); LOG.info("Deleting aborted job dir [{0}] in path [{1}]", taskAttemptID, path); } } else { LOG.warn("TaskAttempt JobID [{0}] does not match JobContext JobId [{1}]", taskAttemptID.getJobID(), jobContext.getJobID()); } } } }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext jobContext, State state) throws IOException { LOG.info("Abort Job [{0}]", jobContext.getJobID()); Configuration configuration = jobContext.getConfiguration(); Path tableOutput = BlurOutputFormat.getOutputPath(configuration); makeSureNoEmptyShards(configuration, tableOutput); FileSystem fileSystem = tableOutput.getFileSystem(configuration); for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) { if (isShard(fileStatus)) { commitOrAbortJob(jobContext, fileStatus.getPath(), false); }//from w ww . j a va 2 s. com } }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { CheckOutputSpecs.checkOutputSpecs(context.getConfiguration(), context.getNumReduceTasks()); }
From source file:org.apache.carbondata.hadoop.api.CarbonFileInputFormat.java
License:Apache License
/** * {@inheritDoc}//from w w w . j av a 2 s .c om * Configurations FileInputFormat.INPUT_DIR * are used to get table path to read. * * @param job * @return List<InputSplit> list of CarbonInputSplit * @throws IOException */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration()); if (null == carbonTable) { throw new IOException("Missing/Corrupt schema file for table."); } AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier(); if (getValidateSegmentsToAccess(job.getConfiguration())) { // get all valid segments and set them into the configuration // check for externalTable segment (Segment_null) // process and resolve the expression ReadCommittedScope readCommittedScope = null; if (carbonTable.isTransactionalTable()) { readCommittedScope = new LatestFilesReadCommittedScope( identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration()); } else { readCommittedScope = getReadCommittedScope(job.getConfiguration()); if (readCommittedScope == null) { readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration()); } else { readCommittedScope.setConfiguration(job.getConfiguration()); } } // this will be null in case of corrupt schema file. PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName()); Expression filter = getFilterPredicates(job.getConfiguration()); // if external table Segments are found, add it to the List List<Segment> externalTableSegments = new ArrayList<Segment>(); Segment seg; if (carbonTable.isTransactionalTable()) { // SDK some cases write into the Segment Path instead of Table Path i.e. inside // the "Fact/Part0/Segment_null". The segment in this case is named as "null". // The table is denoted by default as a transactional table and goes through // the path of CarbonFileInputFormat. The above scenario is handled in the below code. seg = new Segment("null", null, readCommittedScope); externalTableSegments.add(seg); } else { LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList(); for (LoadMetadataDetails load : loadMetadataDetails) { seg = new Segment(load.getLoadName(), null, readCommittedScope); externalTableSegments.add(seg); } } List<InputSplit> splits = new ArrayList<>(); boolean useBlockDataMap = job.getConfiguration().getBoolean("filter_blocks", true); // useBlockDataMap would be false in case of SDK when user has not provided any filter, In // this case we don't want to load block/blocklet datamap. It would be true in all other // scenarios if (useBlockDataMap) { // do block filtering and get split splits = getSplits(job, filter, externalTableSegments, null, partitionInfo, null); } else { for (CarbonFile carbonFile : getAllCarbonDataFiles(carbonTable.getTablePath())) { // Segment id is set to null because SDK does not write carbondata files with respect // to segments. So no specific name is present for this load. CarbonInputSplit split = new CarbonInputSplit("null", new Path(carbonFile.getAbsolutePath()), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3); split.setVersion(ColumnarFormatVersion.V3); BlockletDetailInfo info = new BlockletDetailInfo(); split.setDetailInfo(info); info.setBlockSize(carbonFile.getLength()); info.setVersionNumber(split.getVersion().number()); info.setUseMinMaxForPruning(false); splits.add(split); } Collections.sort(splits, new Comparator<InputSplit>() { @Override public int compare(InputSplit o1, InputSplit o2) { return ((CarbonInputSplit) o1).getPath().compareTo(((CarbonInputSplit) o2).getPath()); } }); } if (getColumnProjection(job.getConfiguration()) == null) { // If the user projection is empty, use default all columns as projections. // All column name will be filled inside getSplits, so can update only here. String[] projectionColumns = projectAllColumns(carbonTable); setColumnProjection(job.getConfiguration(), projectionColumns); } return splits; } return null; }