Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConsciousDriver.java

/**
 * @param args the command line arguments
 *///from w ww  .j ava  2s  .  co m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Movie Rating Mem Conscious Standard Deviation");
    job.setJarByClass(MovieRatingMemConsciousDriver.class);

    job.setMapperClass(MovieRatingMemConscious_Mapper.class);
    job.setCombinerClass(MovingRatingMemConscious_Combiner.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SortedMapWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(MovieRatingMemConscious_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P2_DistinctIPAddress.DistinctIPAddressDriver.java

/**
 * @param args the command line arguments
 *//*from w  w w.  j a va 2s  .  c o  m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address Count");
    job.setJarByClass(DistinctIPAddressDriver.class);
    job.setMapperClass(DistinctIPAddress_Mapper.class);
    job.setCombinerClass(DistinctIPAddress_Reducer.class);
    job.setReducerClass(DistinctIPAddress_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P3_PartitionPattern.Partition_IPAddress_By_MonthDriver.java

/**
 * @param args the command line arguments
 *//*from www  .java 2 s  .  c o  m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address By Date");
    job.setJarByClass(Partition_IPAddress_By_MonthDriver.class);
    job.setMapperClass(Partition_IPAddress_By_Month_Mapper.class);
    //job.setCombinerClass(Partition_IPAddress_By_Month_Reducer.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_IPAddress_By_Month_Partitioner.class);

    // set num of reduce tasks based on partition we need (here we need 12 cos total no.of months in a year)
    job.setNumReduceTasks(12);
    job.setReducerClass(Partition_IPAddress_By_Month_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java

/**
 * @param args the command line arguments
 */// ww  w . ja  v  a 2s. co m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address By Hour");
    job.setJarByClass(Binning_IPAddress_By_DayDriver.class);

    job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks to 0
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P6_StructureToHierarchyPattern.Structure_HierarchyDriver.java

/**
 * @param args the command line arguments
 *//*from   w ww  .  ja  v  a  2  s.co  m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Structure to Hierarchy");
    job.setJarByClass(Structure_HierarchyDriver.class);

    // pass file 1 to this mapper in Text format
    MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class,
            Structure_Hierarchy_Movie_Mapper.class);

    // pass file 2 to this mapper in Text format
    MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class,
            Structure_Hierarchy_Tag_Mapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(Structure_Hierarchy_Reducer.class);

    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

public void setOutputTempPathString(String outPathString) {
    try {/*from   ww w.j  a  v a 2  s  . c o  m*/
        outputTmpBasePath = FileSystem.get(conf).makeQualified(new Path(outPathString));
    } catch (IOException ioe) {
        log.error(
                "Unable to set outputBasePath to {}, leaving as {}" + outPathString + " " + outputTmpBasePath);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.MatrixMultiplicationBenchmark.java

License:Apache License

private void verify() throws Exception {
    // Create NOT transposed matrix A for verification check
    DistributedRowMatrix.createRandomDistributedRowMatrix(m_conf, n, n, new Random(42L), m_matrixAPath, false);
    DistributedRowMatrix matrixA = new DistributedRowMatrix(m_matrixAPath, CONF_INPUT_DIR, n, n);
    matrixA.setConf(m_conf);//from   ww w  .  ja  v a 2 s  . c o  m

    Path matrixDPath = new Path(CONF_INPUT_DIR + "/MatrixD.seq");
    DistributedRowMatrix matrixD = matrixA.multiplyJava(m_matrixB, matrixDPath);

    // Load MapReduce result matrix C
    DistributedRowMatrix matrixC = new DistributedRowMatrix(m_matrixCPath, CONF_OUTPUT_DIR, n, n);
    matrixC.setConf(m_conf);

    if (matrixC.verify(matrixD)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");
    }

    // matrixC.printDistributedRowMatrix();
    // matrixD.printDistributedRowMatrix();
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/********************************* CPU *********************************/
@Override/* w  w  w.  j  av a 2  s. c  o  m*/
public void setup(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException {

    this.m_conf = peer.getConfiguration();
    this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
    this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
    this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(m_conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }

    // Init center vectors
    Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
    FileSystem fs = FileSystem.get(m_conf);

    final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, centroids, m_conf);
        PipesVectorWritable key = new PipesVectorWritable();
        NullWritable value = NullWritable.get();
        while (reader.next(key, value)) {
            DoubleVector center = key.getVector();
            centers.add(center);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

    this.m_centers_cpu = centers.toArray(new DoubleVector[centers.size()]);

    long stopTime = 0;
    if (m_timeMeasurement) {
        stopTime = System.currentTimeMillis();
        LOG.info("# setupTime: " + ((stopTime - startTime) / 1000.0) + " sec");
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("PiEstimatorHybrid,setupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

private void recalculateAssignmentsAndWrite(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException {

    IntWritable keyWrite = new IntWritable();
    for (DoubleVector v : m_cache) {
        final int lowestDistantCenter = getNearestCenter(v);
        keyWrite.set(lowestDistantCenter);
        peer.write(keyWrite, new PipesVectorWritable(v));
    }//w ww.j  a v a2  s .co  m

    // just on the first task write the centers to filesystem to prevent
    // collisions
    if (peer.getPeerName().equals(peer.getPeerName(0))) {
        String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
        if (pathString != null) {
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                    new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);
            final NullWritable value = NullWritable.get();

            for (DoubleVector center : m_centers_cpu) {
                dataWriter.append(new PipesVectorWritable(center), value);
            }
            dataWriter.close();
        }
    }
}

From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

License:Apache License

/********************************* GPU *********************************/
@Override/*from w ww.  ja  v a  2s . c o m*/
public void setupGpu(
        BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
        throws IOException, SyncException, InterruptedException {

    this.m_conf = peer.getConfiguration();
    this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
    this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
    this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);
    this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE));
    this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE));

    // Init logging
    if (m_isDebuggingEnabled) {
        try {
            FileSystem fs = FileSystem.get(m_conf);
            m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                    + "/BSP_" + peer.getTaskId() + ".log"));

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    long startTime = 0;
    if (m_timeMeasurement) {
        startTime = System.currentTimeMillis();
    }

    // Init center vectors
    Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
    FileSystem fs = FileSystem.get(m_conf);

    final List<double[]> centers = new ArrayList<double[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, centroids, m_conf);
        PipesVectorWritable key = new PipesVectorWritable();
        NullWritable value = NullWritable.get();
        while (reader.next(key, value)) {
            centers.add(key.getVector().toArray());
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        if (reader != null) {
            reader.close();
        }
    }

    Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

    // build centers_gpu double[][]
    this.m_centers_gpu = new double[centers.size()][centers.get(0).length];
    for (int i = 0; i < centers.size(); i++) {
        double[] vector = centers.get(i);
        for (int j = 0; j < vector.length; j++) {
            this.m_centers_gpu[i][j] = vector[j];
        }
    }

    long stopTime = 0;
    if (m_timeMeasurement) {
        stopTime = System.currentTimeMillis();
        LOG.info("# setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");
        if (m_isDebuggingEnabled) {
            m_logger.writeChars(
                    "PiEstimatorHybrid,setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
        }
    }
}