Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context)

Source Link

Document

Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method.

Usage

From source file:hr.fer.tel.rovkp.homework02.task02.LocationsReducer.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    mos = new MultipleOutputs<>(context);
}

From source file:ipldataanalysis2.DataAnalysisMapper.java

protected void setup(Context context) {
    // Create a new MultipleOutputs using the context object
    mos = new MultipleOutputs(context);
}

From source file:kogiri.mapreduce.preprocess.indexing.stage1.ReadIndexBuilderMapper.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    this.ppConfig = PreprocessorConfig.createInstance(conf);
    this.mos = new MultipleOutputs(context);
    this.namedOutputs = NamedOutputs.createInstance(conf);

    this.readIDCounter = 0;

    FileSplit inputSplit = (FileSplit) context.getInputSplit();

    int namedoutputID = this.namedOutputs.getIDFromFilename(inputSplit.getPath().getName());
    NamedOutputRecord namedoutputRecord = this.namedOutputs.getRecordFromID(namedoutputID);
    this.namedOutput = namedoutputRecord.getIdentifier();

    this.histogram = new KmerHistogram(namedoutputRecord.getFilename(), this.ppConfig.getKmerSize());
}

From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounterReducer.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    this.namedOutputs = NamedOutputs.createInstance(conf);

    this.mos = new MultipleOutputs(context);
}

From source file:ml.shifu.shifu.core.posttrain.PostTrainMapper.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override//  w ww  .j av  a2s  . c om
protected void setup(Context context) throws IOException, InterruptedException {
    loadConfigFiles(context);

    loadTagWeightNum();

    this.dataPurifier = new DataPurifier(this.modelConfig, false);

    this.outputKey = new IntWritable();
    this.outputValue = new Text();

    this.tags = new HashSet<String>(modelConfig.getFlattenTags());
    SourceType sourceType = this.modelConfig.getDataSet().getSource();

    List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, null, sourceType);

    this.headers = CommonUtils.getFinalHeaders(modelConfig);
    this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers,
            modelConfig.getDataSetDelimiter(), models);

    this.mos = new MultipleOutputs<NullWritable, Text>((TaskInputOutputContext) context);

    this.initFeatureStats();
}

From source file:ml.shifu.shifu.core.varselect.VarSelectReducer.java

License:Apache License

/**
 * Do initialization like ModelConfig and ColumnConfig loading.
 *///from   ww w.ja  v  a 2s.c om
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    loadConfigFiles(context);
    int[] inputOutputIndex = getInputOutputCandidateCounts(this.columnConfigList);
    this.inputNodeCount = inputOutputIndex[0] == 0 ? inputOutputIndex[2] : inputOutputIndex[0];
    this.wrapperRatio = context.getConfiguration().getFloat(Constants.SHIFU_VARSELECT_WRAPPER_RATIO,
            Constants.SHIFU_DEFAULT_VARSELECT_WRAPPER_RATIO);
    this.outputKey = new Text();
    this.outputValue = new Text();
    this.wrapperBy = context.getConfiguration().get(Constants.SHIFU_VARSELECT_WRAPPER_TYPE,
            Constants.WRAPPER_BY_SE);
    this.mos = new MultipleOutputs<Text, Text>(context);
}

From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    Configuration conf = context.getConfiguration();
    mos = new MultipleOutputs(context);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeConfig = cube.getConfig();/*w ww  .ja v a2  s  .c  o  m*/
    cubeDesc = cube.getDescriptor();
    columnList = CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);

    boolean collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
    int numberOfTasks = context.getNumReduceTasks();
    taskId = context.getTaskAttemptID().getTaskID().getId();

    uhcReducerCount = cube.getConfig().getUHCReducerCount();
    initReducerIdToColumnIndex(config);

    if (collectStatistics && (taskId == numberOfTasks - 1)) {
        // hll
        isStatistics = true;
        baseCuboidRowCountInMappers = Lists.newArrayList();
        cuboidHLLMap = Maps.newHashMap();
        samplingPercentage = Integer
                .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        logger.info("Reducer " + taskId + " handling stats");
    } else if (collectStatistics && (taskId == numberOfTasks - 2)) {
        // partition col
        isPartitionCol = true;
        col = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
        if (col == null) {
            logger.info("No partition col. This reducer will do nothing");
        } else {
            logger.info("Reducer " + taskId + " handling partition col " + col.getIdentity());
        }
    } else {
        // normal col
        col = columnList.get(reducerIdToColumnIndex.get(taskId));
        Preconditions.checkNotNull(col);

        // local build dict
        buildDictInReducer = config.isBuildDictInReducerEnabled();
        if (cubeDesc.getDictionaryBuilderClass(col) != null) { // only works with default dictionary builder
            buildDictInReducer = false;
        }
        if (config.getUHCReducerCount() > 1) {
            int[] uhcIndex = CubeManager.getInstance(config).getUHCIndex(cubeDesc);
            int colIndex = reducerIdToColumnIndex.get(taskId);
            if (uhcIndex[colIndex] == 1)
                buildDictInReducer = false; //for UHC columns, this feature should be disabled
        }
        if (buildDictInReducer) {
            builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
            builder.init(null, 0);
        }
        logger.info(
                "Reducer " + taskId + " handling column " + col + ", buildDictInReducer=" + buildDictInReducer);
    }
}

From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeSegment optSegment = cube.getSegmentById(segmentID);
    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

    rowKeySplitter = new RowKeySplitter(originalSegment);
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    recommendCuboids = cube.getCuboidsRecommend();
    Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null");
}

From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryReducer.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    Configuration conf = context.getConfiguration();
    mos = new MultipleOutputs(context);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns();

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    col = uhcColumns.get(taskId);//from  w  ww . j  a  v a  2 s . c  o m
    logger.info("column name: " + col.getIdentity());

    if (cube.getDescriptor().getShardByColumns().contains(col)) {
        //for ShardByColumns
        builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
        builder.init(null, 0, null);
    } else {
        //for GlobalDictionaryColumns
        String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR);
        DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype());
        String builderClass = cubeDesc.getDictionaryBuilderClass(col);
        builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass);
        builder.init(dictionaryInfo, 0, hdfsDir);
    }
}

From source file:org.apache.kylin.engine.mr.steps.UpdateOldCuboidShardMapper.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment cubeSegment = cube.getSegmentById(segmentID);
    CubeSegment oldSegment = cube.getOriginalSegmentToOptimize(cubeSegment);

    cubeDesc = cube.getDescriptor();/*ww w  .j  av a  2  s .  com*/
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    rowKeySplitter = new RowKeySplitter(oldSegment);
    rowKeyEncoderProvider = new RowKeyEncoderProvider(cubeSegment);
}