Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs MultipleOutputs.

Prototype

public MultipleOutputs(TaskInputOutputContext<?, ?, KEYOUT, VALUEOUT> context) 

Source Link

Document

Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method.

Usage

From source file:hr.fer.tel.rovkp.homework02.task02.LocationsReducer.java

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    mos = new MultipleOutputs<>(context);
}

From source file:ipldataanalysis2.DataAnalysisMapper.java

protected void setup(Context context) {
    // Create a new MultipleOutputs using the context object
    mos = new MultipleOutputs(context);
}

From source file:kogiri.mapreduce.preprocess.indexing.stage1.ReadIndexBuilderMapper.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    this.ppConfig = PreprocessorConfig.createInstance(conf);
    this.mos = new MultipleOutputs(context);
    this.namedOutputs = NamedOutputs.createInstance(conf);

    this.readIDCounter = 0;

    FileSplit inputSplit = (FileSplit) context.getInputSplit();

    int namedoutputID = this.namedOutputs.getIDFromFilename(inputSplit.getPath().getName());
    NamedOutputRecord namedoutputRecord = this.namedOutputs.getRecordFromID(namedoutputID);
    this.namedOutput = namedoutputRecord.getIdentifier();

    this.histogram = new KmerHistogram(namedoutputRecord.getFilename(), this.ppConfig.getKmerSize());
}

From source file:kogiri.mapreduce.readfrequency.modecount.ModeCounterReducer.java

License:Open Source License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();

    this.namedOutputs = NamedOutputs.createInstance(conf);

    this.mos = new MultipleOutputs(context);
}

From source file:ml.shifu.shifu.core.posttrain.PostTrainMapper.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override//  w ww  .j av  a2s  . c om
protected void setup(Context context) throws IOException, InterruptedException {
    loadConfigFiles(context);

    loadTagWeightNum();

    this.dataPurifier = new DataPurifier(this.modelConfig, false);

    this.outputKey = new IntWritable();
    this.outputValue = new Text();

    this.tags = new HashSet<String>(modelConfig.getFlattenTags());
    SourceType sourceType = this.modelConfig.getDataSet().getSource();

    List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelConfig, null, sourceType);

    this.headers = CommonUtils.getFinalHeaders(modelConfig);
    this.modelRunner = new ModelRunner(modelConfig, columnConfigList, this.headers,
            modelConfig.getDataSetDelimiter(), models);

    this.mos = new MultipleOutputs<NullWritable, Text>((TaskInputOutputContext) context);

    this.initFeatureStats();
}

From source file:ml.shifu.shifu.core.varselect.VarSelectReducer.java

License:Apache License

/**
 * Do initialization like ModelConfig and ColumnConfig loading.
 *///from   ww w.ja  v  a 2s.c om
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    loadConfigFiles(context);
    int[] inputOutputIndex = getInputOutputCandidateCounts(this.columnConfigList);
    this.inputNodeCount = inputOutputIndex[0] == 0 ? inputOutputIndex[2] : inputOutputIndex[0];
    this.wrapperRatio = context.getConfiguration().getFloat(Constants.SHIFU_VARSELECT_WRAPPER_RATIO,
            Constants.SHIFU_DEFAULT_VARSELECT_WRAPPER_RATIO);
    this.outputKey = new Text();
    this.outputValue = new Text();
    this.wrapperBy = context.getConfiguration().get(Constants.SHIFU_VARSELECT_WRAPPER_TYPE,
            Constants.WRAPPER_BY_SE);
    this.mos = new MultipleOutputs<Text, Text>(context);
}

From source file:org.apache.kylin.engine.mr.steps.FactDistinctColumnsReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    Configuration conf = context.getConfiguration();
    mos = new MultipleOutputs(context);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    cubeConfig = cube.getConfig();/*w ww  .ja v a2  s  .c  o  m*/
    cubeDesc = cube.getDescriptor();
    columnList = CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);

    boolean collectStatistics = Boolean.parseBoolean(conf.get(BatchConstants.CFG_STATISTICS_ENABLED));
    int numberOfTasks = context.getNumReduceTasks();
    taskId = context.getTaskAttemptID().getTaskID().getId();

    uhcReducerCount = cube.getConfig().getUHCReducerCount();
    initReducerIdToColumnIndex(config);

    if (collectStatistics && (taskId == numberOfTasks - 1)) {
        // hll
        isStatistics = true;
        baseCuboidRowCountInMappers = Lists.newArrayList();
        cuboidHLLMap = Maps.newHashMap();
        samplingPercentage = Integer
                .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        logger.info("Reducer " + taskId + " handling stats");
    } else if (collectStatistics && (taskId == numberOfTasks - 2)) {
        // partition col
        isPartitionCol = true;
        col = cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
        if (col == null) {
            logger.info("No partition col. This reducer will do nothing");
        } else {
            logger.info("Reducer " + taskId + " handling partition col " + col.getIdentity());
        }
    } else {
        // normal col
        col = columnList.get(reducerIdToColumnIndex.get(taskId));
        Preconditions.checkNotNull(col);

        // local build dict
        buildDictInReducer = config.isBuildDictInReducerEnabled();
        if (cubeDesc.getDictionaryBuilderClass(col) != null) { // only works with default dictionary builder
            buildDictInReducer = false;
        }
        if (config.getUHCReducerCount() > 1) {
            int[] uhcIndex = CubeManager.getInstance(config).getUHCIndex(cubeDesc);
            int colIndex = reducerIdToColumnIndex.get(taskId);
            if (uhcIndex[colIndex] == 1)
                buildDictInReducer = false; //for UHC columns, this feature should be disabled
        }
        if (buildDictInReducer) {
            builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
            builder.init(null, 0);
        }
        logger.info(
                "Reducer " + taskId + " handling column " + col + ", buildDictInReducer=" + buildDictInReducer);
    }
}

From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeManager cubeManager = CubeManager.getInstance(config);
    CubeInstance cube = cubeManager.getCube(cubeName);
    CubeSegment optSegment = cube.getSegmentById(segmentID);
    CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);

    rowKeySplitter = new RowKeySplitter(originalSegment);
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    recommendCuboids = cube.getCuboidsRecommend();
    Preconditions.checkNotNull(recommendCuboids, "The recommend cuboid map could not be null");
}

From source file:org.apache.kylin.engine.mr.steps.UHCDictionaryReducer.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    Configuration conf = context.getConfiguration();
    mos = new MultipleOutputs(context);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
    String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeDesc cubeDesc = cube.getDescriptor();
    List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns();

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    col = uhcColumns.get(taskId);//from  w  ww . j  a  v a  2 s . c  o m
    logger.info("column name: " + col.getIdentity());

    if (cube.getDescriptor().getShardByColumns().contains(col)) {
        //for ShardByColumns
        builder = DictionaryGenerator.newDictionaryBuilder(col.getType());
        builder.init(null, 0, null);
    } else {
        //for GlobalDictionaryColumns
        String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR);
        DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype());
        String builderClass = cubeDesc.getDictionaryBuilderClass(col);
        builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass);
        builder.init(dictionaryInfo, 0, hdfsDir);
    }
}

From source file:org.apache.kylin.engine.mr.steps.UpdateOldCuboidShardMapper.java

License:Apache License

@Override
protected void doSetup(Context context) throws IOException {
    super.bindCurrentConfiguration(context.getConfiguration());
    mos = new MultipleOutputs(context);

    String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
    String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);

    KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();

    CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
    CubeSegment cubeSegment = cube.getSegmentById(segmentID);
    CubeSegment oldSegment = cube.getOriginalSegmentToOptimize(cubeSegment);

    cubeDesc = cube.getDescriptor();/*ww w  .j  av a  2  s .  com*/
    baseCuboid = cube.getCuboidScheduler().getBaseCuboidId();

    rowKeySplitter = new RowKeySplitter(oldSegment);
    rowKeyEncoderProvider = new RowKeyEncoderProvider(cubeSegment);
}