List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs
public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context)
From source file:hr.fer.tel.rovkp.homework02.task02.LocationsReducer.java
@Override protected void setup(Context context) throws IOException, InterruptedException { mos = new MultipleOutputs<>(context); }
From source file:ipldataanalysis2.DataAnalysisMapper.java
protected void setup(Context context) { // Create a new MultipleOutputs using the context object mos = new MultipleOutputs(context); }
From source file:kogiri.mapreduce.preprocess.indexing.stage1.ReadIndexBuilderMapper.java
License:Open Source License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.ppConfig = PreprocessorConfig.createInstance(conf); this.mos = new MultipleOutputs(context); this.namedOutputs = NamedOutputs.createInstance(conf); this.readIDCounter = 0; FileSplit inputSplit = (FileSplit) context.getInputSplit(); int namedoutputID = this.namedOutputs.getIDFromFilename(inputSplit.getPath().getName()); NamedOutputRecord namedoutputRecord = this.namedOutputs.getRecordFromID(namedoutputID); this.namedOutput = namedoutputRecord.getIdentifier(); this.histogram = new KmerHistogram(namedoutputRecord.getFilename(), this.ppConfig.getKmerSize()); }
From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounterReducer.java
License:Open Source License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.namedOutputs = NamedOutputs.createInstance(conf); this.mos = new MultipleOutputs(context); }
From source file:ml.shifu.shifu.core.posttrain.PostTrainMapper.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override// w ww .j av a2s . c om protected void setup(Context context) throws IOException, InterruptedException { loadConfigFiles(context); loadTagWeightNum(); this.dataPurifier = new DataPurifier(this.modelConfig, false); this.outputKey = new IntWritable(); this.outputValue = new Text(); this.tags = new HashSet<String>(modelConfig.getFlattenTags()); SourceType sourceType = this.modelConfig.getDataSet().getSource(); List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, null, sourceType); this.headers = CommonUtils.getFinalHeaders(modelConfig); this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers, modelConfig.getDataSetDelimiter(), models); this.mos = new MultipleOutputs<NullWritable, Text>((TaskInputOutputContext) context); this.initFeatureStats(); }
From source file:ml.shifu.shifu.core.varselect.VarSelectReducer.java
License:Apache License
/** * Do initialization like ModelConfig and ColumnConfig loading. *///from ww w.ja v a 2s.c om @Override protected void setup(Context context) throws IOException, InterruptedException { loadConfigFiles(context); int[] inputOutputIndex = getInputOutputCandidateCounts(this.columnConfigList); this.inputNodeCount = inputOutputIndex[0] == 0 ? inputOutputIndex[2] : inputOutputIndex[0]; this.wrapperRatio = context.getConfiguration().getFloat(Constants.SHIFU_VARSELECT_WRAPPER_RATIO, Constants.SHIFU_DEFAULT_VARSELECT_WRAPPER_RATIO); this.outputKey = new Text(); this.outputValue = new Text(); this.wrapperBy = context.getConfiguration().get(Constants.SHIFU_VARSELECT_WRAPPER_TYPE, Constants.WRAPPER_BY_SE); this.mos = new MultipleOutputs<Text, Text>(context); }
From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); cubeConfig = cube.getConfig();/*w ww .ja v a2 s .c o m*/ cubeDesc = cube.getDescriptor(); columnList = CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc); boolean collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED)); int numberOfTasks = context.getNumReduceTasks(); taskId = context.getTaskAttemptID().getTaskID().getId(); uhcReducerCount = cube.getConfig().getUHCReducerCount(); initReducerIdToColumnIndex(config); if (collectStatistics && (taskId == numberOfTasks - 1)) { // hll isStatistics = true; baseCuboidRowCountInMappers = Lists.newArrayList(); cuboidHLLMap = Maps.newHashMap(); samplingPercentage = Integer .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); logger.info("Reducer " + taskId + " handling stats"); } else if (collectStatistics && (taskId == numberOfTasks - 2)) { // partition col isPartitionCol = true; col = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef(); if (col == null) { logger.info("No partition col. This reducer will do nothing"); } else { logger.info("Reducer " + taskId + " handling partition col " + col.getIdentity()); } } else { // normal col col = columnList.get(reducerIdToColumnIndex.get(taskId)); Preconditions.checkNotNull(col); // local build dict buildDictInReducer = config.isBuildDictInReducerEnabled(); if (cubeDesc.getDictionaryBuilderClass(col) != null) { // only works with default dictionary builder buildDictInReducer = false; } if (config.getUHCReducerCount() > 1) { int[] uhcIndex = CubeManager.getInstance(config).getUHCIndex(cubeDesc); int colIndex = reducerIdToColumnIndex.get(taskId); if (uhcIndex[colIndex] == 1) buildDictInReducer = false; //for UHC columns, this feature should be disabled } if (buildDictInReducer) { builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0); } logger.info( "Reducer " + taskId + " handling column " + col + ", buildDictInReducer=" + buildDictInReducer); } }
From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java
License:Apache License
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); mos = new MultipleOutputs(context); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube = cubeManager.getCube(cubeName); CubeSegment optSegment = cube.getSegmentById(segmentID); CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment); rowKeySplitter = new RowKeySplitter(originalSegment); baseCuboid = cube.getCuboidScheduler().getBaseCuboidId(); recommendCuboids = cube.getCuboidsRecommend(); Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null"); }
From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryReducer.java
License:Apache License
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId);//from w ww . j a v a 2 s . c o m logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
From source file:org.apache.kylin.engine.mr.steps.UpdateOldCuboidShardMapper.java
License:Apache License
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); mos = new MultipleOutputs(context); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeSegment cubeSegment = cube.getSegmentById(segmentID); CubeSegment oldSegment = cube.getOriginalSegmentToOptimize(cubeSegment); cubeDesc = cube.getDescriptor();/*ww w .j av a 2 s . com*/ baseCuboid = cube.getCuboidScheduler().getBaseCuboidId(); rowKeySplitter = new RowKeySplitter(oldSegment); rowKeyEncoderProvider = new RowKeyEncoderProvider(cubeSegment); }