Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:com.rapleaf.hank.hadoop.HadoopDomainCompactor.java

License:Apache License

public static void main(String[] args) throws IOException, InvalidConfigurationException {
    CommandLineChecker.check(args, new String[] { "domain name", "version to compact number",
            "mapred.task.timeout", "config path", "jobjar" }, HadoopDomainCompactor.class);
    String domainName = args[0];/*from w  w  w.  j  a va  2s  . c om*/
    Integer versionToCompactNumber = Integer.valueOf(args[1]);
    Integer mapredTaskTimeout = Integer.valueOf(args[2]);
    CoordinatorConfigurator configurator = new YamlClientConfigurator(args[3]);
    String jobJar = args[4];

    DomainCompactorProperties properties = new DomainCompactorProperties(domainName, versionToCompactNumber,
            configurator);
    JobConf conf = new JobConf();
    conf.setJar(jobJar);
    conf.set("mapred.task.timeout", mapredTaskTimeout.toString());
    conf.setJobName(HadoopDomainCompactor.class.getSimpleName() + " Domain " + domainName + ", Version "
            + versionToCompactNumber);
    HadoopDomainCompactor compactor = new HadoopDomainCompactor(conf);
    LOG.info("Compacting Hank domain " + domainName + " version " + versionToCompactNumber
            + " with coordinator configuration " + configurator);
    compactor.buildHankDomain(properties,
            new IncrementalDomainVersionProperties.Base("Version " + versionToCompactNumber + " compacted"));
}

From source file:com.ricemap.spateDB.core.SpatialSite.java

License:Apache License

public static void setCells(JobConf job, CellInfo[] cellsInfo) throws IOException {
    Path tempFile;/*from  w w w. ja  v a2 s  .c om*/
    FileSystem fs = FileSystem.get(job);
    do {
        tempFile = new Path(job.getJobName() + "_" + (int) (Math.random() * 1000000) + ".cells");
    } while (fs.exists(tempFile));
    FSDataOutputStream out = fs.create(tempFile);
    out.writeInt(cellsInfo.length);
    for (CellInfo cell : cellsInfo) {
        cell.write(out);
    }
    out.close();

    fs.deleteOnExit(tempFile);

    DistributedCache.addCacheFile(tempFile.toUri(), job);
    job.set(OUTPUT_CELLS, tempFile.getName());
    LOG.info("Partitioning file into " + cellsInfo.length + " cells");
}

From source file:com.ricemap.spateDB.operations.RangeQuery.java

License:Apache License

/**
 * Performs a range query using MapReduce
 * /*from w  w w .j a  v a  2s. co  m*/
 * @param fs
 * @param inputFile
 * @param queryRange
 * @param shape
 * @param output
 * @return
 * @throws IOException
 */
public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape,
        Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(
                    inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outputPath));
    } else {
        if (outFs.exists(outputPath)) {
            if (overwrite) {
                outFs.delete(outputPath, true);
            } else {
                throw new RuntimeException("Output path already exists and -overwrite flag is not set");
            }
        }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
        // RTree indexed file
        LOG.info("Searching an RTree indexed file");
        job.setInputFormat(RTreeInputFormat.class);
    } else {
        // A file with no local index
        LOG.info("Searching a non local-indexed file");
        job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
        Class<?> c = shape.getClass();
        Field f = c.getDeclaredField(query.field);
        f.setAccessible(true);
        if (f.getType().equals(Integer.TYPE)) {
            OutputKey = IntWritable.class;
        } else if (f.getType().equals(Double.TYPE)) {
            OutputKey = DoubleWritable.class;
        } else if (f.getType().equals(Long.TYPE)) {
            OutputKey = LongWritable.class;
        }
    } catch (SecurityException e) {
        e.printStackTrace();
    } catch (NoSuchFieldException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
    case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
    case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
    default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
        RunningJob runningJob = JobClient.runJob(job);
        Counters counters = runningJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        final long resultCount = outputRecordCounter.getValue();

        // If outputPath not set by user, automatically delete it
        if (userOutputPath == null)
            outFs.delete(outputPath, true);

        return resultCount;
    } else {
        JobClient jc = new JobClient(job);
        lastRunningJob = jc.submitJob(job);
        return -1;
    }
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape,
        String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite)
        throws IOException {
    JobConf job = new JobConf(RandomSpatialGenerator.class);

    job.setJobName("Generator");
    FileSystem outFs = file.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(file)) {
        if (overwrite)
            outFs.delete(file, true);/* w  w w. ja  v a2s  . c  o  m*/
        else
            throw new RuntimeException(
                    "Output file '" + file + "' already exists and overwrite flag is not set");
    }

    // Set generation parameters in job
    job.setLong(RandomShapeGenerator.GenerationSize, size);
    SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr);
    if (seed != 0)
        job.setLong(RandomShapeGenerator.GenerationSeed, seed);
    if (rectsize != 0)
        job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize);
    if (type != null)
        job.set(RandomShapeGenerator.GenerationType, type.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    // Set input format and map class
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Repartition.RepartitionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    SpatialSite.setShapeClass(job, shape.getClass());

    if (blocksize != 0) {
        job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize);
    }

    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        FileSystem fs = file.getFileSystem(job);
        if (blocksize == 0) {
            blocksize = fs.getDefaultBlockSize(file);
        }
        int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize);
        gridInfo.calculateCellDimensions(numOfCells);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cells);

    // Do not set a reduce function. Use the default identity reduce function
    if (cells.length == 1) {
        // All objects are in one partition. No need for a reduce phase
        job.setNumReduceTasks(0);
    } else {
        // More than one partition. Need a reduce phase to group shapes of the
        // same partition together
        job.setReducerClass(RepartitionReduce.class);
        job.setNumReduceTasks(
                Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));
    }

    // Set output path
    FileOutputFormat.setOutputPath(job, file);
    if (sindex == null || sindex.equals("grid")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    JobClient.runJob(job);

    // Concatenate all master files into one file
    FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().contains("_master");
        }
    });
    String ext = resultFiles[0].getPath().getName()
            .substring(resultFiles[0].getPath().getName().lastIndexOf('.'));
    Path masterPath = new Path(file, "_master" + ext);
    OutputStream destOut = outFs.create(masterPath);
    byte[] buffer = new byte[4096];
    for (FileStatus f : resultFiles) {
        InputStream in = outFs.open(f.getPath());
        int bytes_read;
        do {
            bytes_read = in.read(buffer);
            if (bytes_read > 0)
                destOut.write(buffer, 0, bytes_read);
        } while (bytes_read > 0);
        in.close();
        outFs.delete(f.getPath(), false);
    }
    destOut.close();

    // Plot an image for the partitions used in file
    Path imagePath = new Path(file, "_partitions.png");
    int imageSize = (int) (Math.sqrt(cells.length) * 300);
    Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false,
            false);
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java

License:Apache License

/**
 * Update the job with details about the file split
 * @param job the job configuration to update
 * @param inputSplit the file split/* w w  w. j a  v  a2  s  .c o m*/
 */
private void updateJobWithSplit(final JobConf job, Object inputSplit) {
    if (inputSplit instanceof FileSplit) {
        FileSplit fileSplit = (FileSplit) inputSplit;
        try {
            if (fileSplit.getPath() != null) {
                job.set("mapreduce.map.input.file", fileSplit.getPath().toString());
            }
        } catch (IllegalArgumentException e) {
            //Swallow this, it appears in Hive splits, which do not have the path encoded
            //(storage handler for NamedMap is an example).
        }
        job.setLong("mapreduce.map.input.start", fileSplit.getStart());
        job.setLong("mapreduce.map.input.length", fileSplit.getLength());
    }
    LOG.info("Processing split: " + inputSplit);
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java

License:Apache License

static void updateJobConf(JobConf jobConf, TaskAttemptID taskAttemptID, int partition) {

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 1.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------

    jobConf.set("mapred.tip.id", taskAttemptID.getTaskID().toString());
    jobConf.set("mapred.task.id", taskAttemptID.toString());
    jobConf.setBoolean("mapred.task.is.map", false);
    jobConf.setInt("mapred.task.partition", partition);
    jobConf.set("mapred.job.id", taskAttemptID.getJobID().toString());

    //---------------------------------------------------------------------------------
    //Based on the localizeConfiguration(...) method from Task.java, part of Apache Hadoop 2.2.0,
    //licensed under Apache License, Version 2.0
    //----------------------------------------------------------------------------------
    jobConf.set(TASK_ID, taskAttemptID.getTaskID().toString());
    jobConf.set(TASK_ATTEMPT_ID, taskAttemptID.toString());
    jobConf.setBoolean(TASK_ISMAP, false);
    jobConf.setInt(TASK_PARTITION, partition);
    jobConf.set(ID, taskAttemptID.getJobID().toString());
    //----------------------------------------------------------------------------------
}

From source file:com.scaleoutsoftware.soss.hserver.hive.HServerHiveStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    String mapName = tableDesc.getProperties().getProperty(MAP_NAME);
    String valueSerializer = tableDesc.getProperties().getProperty(VALUE_SERIALIZER);
    String valueType = tableDesc.getProperties().getProperty(VALUE_TYPE);

    jobConf.set(MAP_NAME, mapName);

    if (valueSerializer != null) {
        jobConf.set(VALUE_SERIALIZER, valueSerializer);
        if (valueType != null) {
            jobConf.set(VALUE_TYPE, valueType);
        }/*from ww  w  . j a  v  a2 s  .  co m*/
    }
}

From source file:com.scaleunlimited.helpful.tools.AnalyzeEmail.java

License:Apache License

private static JobConf getDefaultJobConf() throws IOException {
    JobClient jobClient = new JobClient(new JobConf());
    ClusterStatus status = jobClient.getClusterStatus();
    int trackers = status.getTaskTrackers();

    JobConf conf = new JobConf();
    conf.setNumMapTasks(trackers * 10);/*from w  w w .j a v a 2  s . c  om*/

    conf.setNumReduceTasks((trackers * conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 2)));

    conf.setMapSpeculativeExecution(false);
    conf.setReduceSpeculativeExecution(false);
    conf.set("mapred.child.java.opts", "-server -Xmx512m -Xss128k");

    // Should match the value used for Xss above. Note no 'k' suffix for the ulimit command.
    // New support that one day will be in Hadoop.
    conf.set("mapred.child.ulimit.stack", "128");

    return conf;
}

From source file:com.splicemachine.orc.OrcTester.java

License:Open Source License

static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });/*w  w  w.java2s.  co  m*/
}

From source file:com.splicemachine.orc.OrcTester.java

License:Open Source License

private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec,
        ObjectInspector columnObjectInspector) throws IOException {
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
            compressionCodec != NONE, createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> {/*from   w  ww  .  j a  v a2s  . co m*/
            });
}