Example usage for org.apache.hadoop.mapred JobConf setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setInt.

Prototype

public void setInt(String name, int value)

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:edu.umn.cs.spatialHadoop.nasa.DistributedAggregateSpatioTemporalIndexer.java

License:Open Source License

/**
 * Build a bunch of AggregateQuadTrees using a Map-Reduce job
 * /*from  w ww. j  a v  a2  s  .c o m*/
 * @param inputPathsDictionaryPath
 * @param params
 * @throws IOException
 */
public static void aggregateQuadTreeMapReduce(Path inputPathsDictionaryPath, OperationsParams params)
        throws IOException {

    // configure a map-reduce job
    JobConf job = new JobConf(params, DistributedAggregateSpatioTemporalIndexer.class);

    Path outputPath;
    String outputPathPrefix = "aggQuadTree_";
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(outputPathPrefix + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("AggregateQuadTree");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapperClass(AggregateQuadTreeMaper.class);
    job.set(HDFSIndexPath, hdfsIndexPath.toString());

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    TextInputFormat.setInputPaths(job, inputPathsDictionaryPath);
    TextOutputFormat.setOutputPath(job, outputPath);

    if (job.getBoolean("local", false)) {
        // Enforce local execution if explicitly set by user or for small
        // files
        job.set("mapred.job.tracker", "local");
        // Use multithreading too
        job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, 16);
    }
    job.setNumReduceTasks(0);

    // Submit the job
    JobClient.runJob(job);

    outFs.delete(outputPath, true);
}

From source file:edu.umn.cs.spatialHadoop.operations.DistributedJoin.java

License:Open Source License

/**
 * Performs a redistribute join between the given files using the
* redistribute join algorithm. Currently, we only support a pair of files.
 * @param inFiles/*from   w w w . jav a  2  s . c  o m*/
 * @param userOutputPath
 * @param params
 * @return
 * @throws IOException
 */
public static <S extends Shape> long joinStep(Path[] inFiles, Path userOutputPath, OperationsParams params)
        throws IOException {
    long t1 = System.currentTimeMillis();

    JobConf job = new JobConf(params, DistributedJoin.class);

    FileSystem fs[] = new FileSystem[inFiles.length];
    for (int i_file = 0; i_file < inFiles.length; i_file++)
        fs[i_file] = inFiles[i_file].getFileSystem(job);

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inFiles[0].getName() + ".dj_" + (int) (Math.random() * 1000000));
        } while (fs[0].exists(outputPath));
    }

    job.setJobName("DistributedJoin");
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    GlobalIndex<Partition> gindex1 = SpatialSite.getGlobalIndex(fs[0], inFiles[0]);
    GlobalIndex<Partition> gindex2 = SpatialSite.getGlobalIndex(fs[1], inFiles[1]);

    OperationsParams.setFilterOnlyModeFlag(job, isFilterOnlyMode, isFilterOnly);

    LOG.info("Joining " + inFiles[0] + " X " + inFiles[1]);

    if (SpatialSite.isRTree(fs[0], inFiles[0]) && SpatialSite.isRTree(fs[1], inFiles[1])) {
        job.setInputFormat(DJInputFormatRTree.class);
    } else {
        if (isOneShotReadMode) {
            // Ensure all objects are read in one shot
            job.setInt(SpatialSite.MaxBytesInOneRead, -1);
            job.setInt(SpatialSite.MaxShapesInOneRead, -1);
        } else {
            job.setInt(SpatialSite.MaxBytesInOneRead, maxBytesInOneRead);
            job.setInt(SpatialSite.MaxShapesInOneRead, maxShapesInOneRead);
        }
        job.setInputFormat(DJInputFormatArray.class);
    }

    // Set input paths and map function
    if (inFiles[0].equals(inFiles[1])) {
        // Self join
        job.setInputFormat(ShapeArrayInputFormat.class);
        // Remove the spatial filter to ensure all partitions are loaded
        FileInputFormat.setInputPaths(job, inFiles[0]);
        if (gindex1 != null && gindex1.isReplicated())
            job.setMapperClass(RedistributeJoinMap.class);
        else
            job.setMapperClass(RedistributeJoinMapNoDupAvoidance.class);
    } else {
        // Binary version of spatial join (two different input files)
        job.setClass(SpatialSite.FilterClass, SpatialJoinFilter.class, BlockFilter.class);
        FileInputFormat.setInputPaths(job, inFiles);
        if ((gindex1 != null && gindex1.isReplicated()) || (gindex2 != null && gindex2.isReplicated())) {
            // Need the map function with duplicate avoidance step.
            job.setMapperClass(RedistributeJoinMap.class);
        } else {
            // No replication in both indexes, use map function with no dup
            // avoidance
            job.setMapperClass(RedistributeJoinMapNoDupAvoidance.class);
        }
    }

    Shape shape = params.getShape("shape");
    job.setMapOutputKeyClass(shape.getClass());
    job.setMapOutputValueClass(shape.getClass());
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));
    job.setNumReduceTasks(0); // No reduce needed for this task

    if (job.getBoolean("output", true))
        job.setOutputFormat(TextOutputFormat.class);
    else
        job.setOutputFormat(NullOutputFormat.class);

    TextOutputFormat.setOutputPath(job, outputPath);

    if (!params.getBoolean("background", false)) {
        LOG.info("Submit job in sync mode");
        RunningJob runningJob = JobClient.runJob(job);
        Counters counters = runningJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        final long resultCount = outputRecordCounter.getValue();

        // Output number of running map tasks
        Counter mapTaskCountCounter = counters.findCounter(JobInProgress.Counter.TOTAL_LAUNCHED_MAPS);
        System.out.println("Number of map tasks " + mapTaskCountCounter.getValue());

        // Delete output directory if not explicitly set by user
        if (userOutputPath == null)
            fs[0].delete(outputPath, true);
        long t2 = System.currentTimeMillis();
        System.out.println("Join time " + (t2 - t1) + " millis");

        return resultCount;
    } else {
        JobClient jc = new JobClient(job);
        LOG.info("Submit job in async mode");
        lastRunningJob = jc.submitJob(job);
        LOG.info("Job " + lastRunningJob + " submitted successfully");
        return -1;
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.FileMBR.java

License:Open Source License

/**
 * Computes the MBR of the input file using an aggregate MapReduce job.
 * /*from   www .j a  va 2 s  . c o m*/
 * @param inFile - Path to input file
 * @param params - Additional operation parameters
 * @return
 * @throws IOException
 * @throws InterruptedException 
 */
private static <S extends Shape> Partition fileMBRMapReduce(Path[] inFiles, OperationsParams params)
        throws IOException, InterruptedException {
    JobConf job = new JobConf(params, FileMBR.class);

    Path outputPath;
    FileSystem outFs = FileSystem.get(job);
    do {
        outputPath = new Path(inFiles[0].getName() + ".mbr_" + (int) (Math.random() * 1000000));
    } while (outFs.exists(outputPath));

    job.setJobName("FileMBR");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Partition.class);

    job.setMapperClass(FileMBRMapper.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Combine.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inFiles);
    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputCommitter(MBROutputCommitter.class);

    // Submit the job
    if (OperationsParams.isLocal(job, inFiles)) {
        // Enforce local execution if explicitly set by user or for small files
        job.set("mapred.job.tracker", "local");
        // Use multithreading too
        job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());
    }

    if (params.getBoolean("background", false)) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
        return null;
    } else {
        lastSubmittedJob = JobClient.runJob(job);
        Counters counters = lastSubmittedJob.getCounters();
        Counter outputSizeCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES);
        sizeOfLastProcessedFile = outputSizeCounter.getCounter();

        FileStatus[] outFiles = outFs.listStatus(outputPath, SpatialSite.NonHiddenFileFilter);
        Partition mbr = new Partition();
        mbr.set(Double.MAX_VALUE, Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE);
        OperationsParams localMBRParams = new OperationsParams(params);
        localMBRParams.setBoolean("local", true); // Enforce local execution
        localMBRParams.setClass("shape", Partition.class, Shape.class);
        for (FileStatus outFile : outFiles) {
            if (outFile.isDir())
                continue;
            ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(localMBRParams,
                    new FileSplit(outFile.getPath(), 0, outFile.getLen(), new String[0]));
            Rectangle key = reader.createKey();
            Partition p = reader.createValue();
            while (reader.next(key, p)) {
                mbr.expand(p);
            }
            reader.close();
        }

        outFs.delete(outputPath, true);
        return mbr;
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    JobConf job = new JobConf(params, Indexer.class);
    job.setJobName("Indexer");

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) params.getShape("mbr");
    if (inputMBR == null)
        inputMBR = FileMBR.fileMBR(inPath, params);
    OperationsParams.setShape(job, "mbr", inputMBR);

    // Set input and output
    job.setInputFormat(ShapeIterInputFormat.class);
    ShapeIterInputFormat.setInputPaths(job, inPath);
    job.setOutputFormat(IndexOutputFormat.class);
    GridOutputFormat.setOutputPath(job, outPath);

    // Set the correct partitioner according to index type
    String index = job.get("sindex");
    if (index == null)
        throw new RuntimeException("Index type is not set");
    long t1 = System.currentTimeMillis();
    Partitioner partitioner = createPartitioner(inPath, outPath, job, index);
    Partitioner.setPartitioner(job, partitioner);
    long t2 = System.currentTimeMillis();
    System.out.println("Total time for space subdivision in millis: " + (t2 - t1));

    // Set mapper and reducer
    Shape shape = params.getShape("shape");
    job.setMapperClass(IndexMethods.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setReducerClass(IndexMethods.class);
    job.setOutputCommitter(IndexerOutputCommitter.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks()));
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    // Use multithreading in case the job is running locally
    job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } else {//from  w  w  w .  ja  va2  s .  c  o  m
        // Run and block until it is finished
        return JobClient.runJob(job);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Plot.java

License:Apache License

public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width,
        int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount,
        boolean background) throws IOException {
    JobConf job = new JobConf(Plot.class);
    job.setJobName("Plot");

    job.setMapperClass(PlotMap.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setReducerClass(PlotReduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));
    job.setMapOutputKeyClass(Rectangle.class);
    SpatialSite.setShapeClass(job, shape.getClass());
    job.setMapOutputValueClass(shape.getClass());

    FileSystem inFs = inFile.getFileSystem(job);
    Rectangle fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false);
    FileStatus inFileStatus = inFs.getFileStatus(inFile);

    CellInfo[] cellInfos;/*from w  w w. j av a 2  s. co m*/
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile);
    if (gindex == null) {
        // A heap file. The map function should partition the file
        GridInfo gridInfo = new GridInfo(fileMbr.x1, fileMbr.y1, fileMbr.x2, fileMbr.y2);
        gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize());
        cellInfos = gridInfo.getAllCells();
        // Doesn't make sense to show any partition information in a heap file
        showBorders = showBlockCount = showRecordCount = false;
    } else {
        cellInfos = SpatialSite.cellsOf(inFs, inFile);
    }

    // Set cell information in the job configuration to be used by the mapper
    SpatialSite.setCells(job, cellInfos);

    // Adjust width and height to maintain aspect ratio
    if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) {
        // Fix width and change height
        height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1));
    } else {
        width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1));
    }
    LOG.info("Creating an image of size " + width + "x" + height);
    ImageOutputFormat.setFileMBR(job, fileMbr);
    ImageOutputFormat.setImageWidth(job, width);
    ImageOutputFormat.setImageHeight(job, height);
    job.setBoolean(ShowBorders, showBorders);
    job.setBoolean(ShowBlockCount, showBlockCount);
    job.setBoolean(ShowRecordCount, showRecordCount);
    job.setInt(StrokeColor, color.getRGB());

    // Set input and output
    job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);
    // Set output committer which will stitch images together after all reducers
    // finish
    job.setOutputCommitter(PlotOutputCommitter.class);

    job.setOutputFormat(ImageOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outFile);

    if (background) {
        JobClient jc = new JobClient(job);
        lastSubmittedJob = jc.submitJob(job);
    } else {
        lastSubmittedJob = JobClient.runJob(job);
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.PlotPyramid.java

License:Apache License

public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int tileWidth,
        int tileHeight, int numLevels) throws IOException {
    JobConf job = new JobConf(PlotPyramid.class);
    job.setJobName("Plot");

    job.setMapperClass(PlotMap.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setReducerClass(PlotReduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));
    SpatialSite.setShapeClass(job, shape.getClass());
    job.setMapOutputKeyClass(TileIndex.class);
    job.setMapOutputValueClass(shape.getClass());

    FileSystem inFs = inFile.getFileSystem(job);
    Rectangle fileMBR = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false);

    // Expand input file to a rectangle for compatibility with the pyramid
    // structure/*www.j  a  va  2s.  co m*/
    if (fileMBR.getWidth() > fileMBR.getHeight()) {
        fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth();
    } else {
        fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight();
    }
    SpatialSite.setRectangle(job, InputMBR, fileMBR);
    job.setInt(TileWidth, tileWidth);
    job.setInt(TileHeight, tileHeight);
    job.setInt(NumLevels, numLevels);

    // Set input and output
    job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);

    job.setOutputFormat(PyramidOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outFile);

    JobClient.runJob(job);
}

From source file:edu.umn.cs.spatialHadoop.operations.PyramidPlot.java

License:Apache License

/**
 * Plot a file to a set of images in different zoom levels using a MapReduce
 * program.//w  ww.ja v  a  2  s  .c o m
 * @param <S> type of shapes stored in file
 * @param inFile - Path to the input file(s)
 * @param outFile - Path to the output file (image)
 * @param shape - A sample object to be used for parsing input file
 * @param tileWidth - With of each tile 
 * @param tileHeight - Height of each tile
 * @param vflip - Set to <code>true</code> to file the whole image vertically
 * @param color - Color used to draw single shapes
 * @param numLevels - Number of zoom levels to plot
 * @throws IOException
 */
private static <S extends Shape> RunningJob plotMapReduce(Path inFile, Path outFile, OperationsParams params)
        throws IOException {
    Color color = params.getColor("color", Color.BLACK);

    String hdfDataset = (String) params.get("dataset");
    Shape shape = hdfDataset != null ? new NASARectangle() : params.getShape("shape");
    Shape plotRange = params.getShape("rect");

    boolean background = params.is("background");

    JobConf job = new JobConf(params, PyramidPlot.class);
    job.setJobName("PlotPyramid");

    String partition = job.get("partition", "space").toLowerCase();
    if (partition.equals("space")) {
        job.setMapperClass(SpacePartitionMap.class);
        job.setReducerClass(SpacePartitionReduce.class);
        job.setMapOutputKeyClass(TileIndex.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setInputFormat(ShapeInputFormat.class);
    } else {
        job.setMapperClass(DataPartitionMap.class);
        job.setReducerClass(DataPartitionReduce.class);
        job.setMapOutputKeyClass(TileIndex.class);
        job.setMapOutputValueClass(ImageWritable.class);
        job.setInputFormat(ShapeArrayInputFormat.class);
    }

    job.setInt("color", color.getRGB());
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    if (shape instanceof Point && job.getBoolean("sample", false)) {
        // Enable adaptive sampling
        int imageWidthRoot = job.getInt("tilewidth", 256);
        int imageHeightRoot = job.getInt("tileheight", 256);
        long recordCount = FileMBR.fileMBR(inFile, params).recordCount;
        float sampleRatio = params.getFloat(GeometricPlot.AdaptiveSampleFactor, 1.0f) * imageWidthRoot
                * imageHeightRoot / recordCount;
        job.setFloat(GeometricPlot.AdaptiveSampleRatio, sampleRatio);
    }

    Rectangle fileMBR;
    if (hdfDataset != null) {
        // Input is HDF
        job.set(HDFRecordReader.DatasetName, hdfDataset);
        job.setBoolean(HDFRecordReader.SkipFillValue, true);
        job.setClass("shape", NASARectangle.class, Shape.class);
        // Determine the range of values by opening one of the HDF files
        Aggregate.MinMax minMax = Aggregate.aggregate(new Path[] { inFile }, params);
        job.setInt(MinValue, minMax.minValue);
        job.setInt(MaxValue, minMax.maxValue);
        //fileMBR = new Rectangle(-180, -90, 180, 90);
        fileMBR = plotRange != null ? plotRange.getMBR() : new Rectangle(-180, -140, 180, 169);
        //      job.setClass(HDFRecordReader.ProjectorClass, MercatorProjector.class,
        //          GeoProjector.class);
    } else {
        fileMBR = FileMBR.fileMBR(inFile, params);
    }

    boolean keepAspectRatio = params.is("keep-ratio", true);
    if (keepAspectRatio) {
        // Expand input file to a rectangle for compatibility with the pyramid
        // structure
        if (fileMBR.getWidth() > fileMBR.getHeight()) {
            fileMBR.y1 -= (fileMBR.getWidth() - fileMBR.getHeight()) / 2;
            fileMBR.y2 = fileMBR.y1 + fileMBR.getWidth();
        } else {
            fileMBR.x1 -= (fileMBR.getHeight() - fileMBR.getWidth() / 2);
            fileMBR.x2 = fileMBR.x1 + fileMBR.getHeight();
        }
    }

    SpatialSite.setRectangle(job, InputMBR, fileMBR);

    // Set input and output
    ShapeInputFormat.addInputPath(job, inFile);
    if (plotRange != null) {
        job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    }

    job.setOutputFormat(PyramidOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outFile);
    job.setOutputCommitter(PlotPyramidOutputCommitter.class);

    if (background) {
        JobClient jc = new JobClient(job);
        return lastSubmittedJob = jc.submitJob(job);
    } else {
        return lastSubmittedJob = JobClient.runJob(job);
    }

}

From source file:edu.umn.cs.spatialHadoop.operations.RecordCount.java

License:Open Source License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing/*from www. j a v a 2s  . com*/
 * @param fs
 * @param inFile
 * @return
 * @throws IOException
 * @throws InterruptedException
 */
public static long recordCountMapReduce(FileSystem fs, Path inFile) throws IOException, InterruptedException {
    JobConf job = new JobConf(RecordCount.class);

    Path outputPath = new Path(inFile.toUri().getPath() + ".linecount");
    FileSystem outFs = outputPath.getFileSystem(job);
    outFs.delete(outputPath, true);

    job.setJobName("LineCount");
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setCombinerClass(Reduce.class);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(1);

    job.setInputFormat(ShapeLineInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    ShapeLineInputFormat.setInputPaths(job, inFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    JobClient.runJob(job);

    // Read job result
    if (OperationsParams.isLocal(job, inFile)) {
        // Enforce local execution if explicitly set by user or for small files
        job.set("mapred.job.tracker", "local");
        // Use multithreading too
        job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());
    }

    long lineCount = 0;
    FileStatus[] results = outFs.listStatus(outputPath);
    for (FileStatus fileStatus : results) {
        if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) {
            LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath()));
            Text text = new Text();
            if (lineReader.readLine(text) > 0) {
                lineCount = Long.parseLong(text.toString());
            }
            lineReader.close();
        }
    }

    outFs.delete(outputPath, true);

    return lineCount;
}

From source file:edu.umn.cs.spatialHadoop.operations.Shuffle.java

License:Open Source License

/**
 * Counts the exact number of lines in a file by issuing a MapReduce job
 * that does the thing//from  w w  w  .  j  a va 2 s .c o m
 * @param infile
 * @param outfile
 * @param params
 * @throws IOException
 */
public static void randomizerMapReduce(Path infile, Path outfile, OperationsParams params) throws IOException {
    JobConf job = new JobConf(Shuffle.class);

    job.setJobName("Randomizer");
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);

    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    job.setInt(NumOfPartitions, Math.max(1, clusterStatus.getMaxReduceTasks()));

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.setInputPaths(job, infile);

    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outfile);

    // Submit the job
    JobClient.runJob(job);
}

From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("GlobalHasher");

    conf.setMapOutputKeyClass(UnsortableInt.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GlobalHasher.Map.class);
    conf.setReducerClass(GlobalHasher.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  www  .  j a  va  2  s  .  co m*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    int partNo = Integer.parseInt(args[2]);
    conf.setNumReduceTasks(partNo);

    conf.set(DELIMITER_PARAM, args[3]);

    int hashFieldPos = Integer.parseInt(args[4]);
    conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos);

    return conf;
}