Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void rangeQueryOperation(OperationsParams parameters) throws Exception {
    final OperationsParams params = parameters;

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();/*from  w  w w.j  a v a  2 s.  c  o  m*/
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.operations.STRangeQuery.java

License:Open Source License

public static void main(String[] args) throws Exception {
    //      args = new String[7];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "/home/louai/nyc-taxi/resultSTRQ";
    //      args[2] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[3] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[4] = "interval:2015-01-01,2015-01-02";
    //      args[5] = "-overwrite";
    //      args[6] = "-no-local";

    // Query for test with output
    //      args = new String[6];
    //      args[0] = "/home/louai/nyc-taxi/yellowIndex";
    //      args[1] = "shape:edu.umn.cs.sthadoop.core.STPoint";
    //      args[2] = "rect:-74.98451232910156,35.04014587402344,-73.97936248779295,41.49399566650391";
    //      args[3] = "interval:2015-01-01,2015-01-03";
    //      args[4] = "-overwrite";
    //      args[5   ] = "-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//from w w  w  .  ja va  2  s  . c o m
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }
    if (params.get("rect") == null) {
        String x1 = "-" + Double.toString(Double.MAX_VALUE);
        String y1 = "-" + Double.toString(Double.MAX_VALUE);
        String x2 = Double.toString(Double.MAX_VALUE);
        String y2 = Double.toString(Double.MAX_VALUE);
        System.out.println(x1 + "," + y1 + "," + x2 + "," + y2);
        params.set("rect", x1 + "," + y1 + "," + x2 + "," + y2);
        //         System.err.println("You must provide a query range");
        //         printUsage();
        //         System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint) && !(inObj instanceof STRectangle)) {
        LOG.error("Shape is not instance of STPoint or STRectangle");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java

License:Open Source License

/**
 * A MapReduce version of KNN query./*  w  ww  . ja  v a2s  .  co m*/
 * 
 * @param fs
 * @param inputPath
 * @param queryPoint
 * @param shape
 * @param output
 * @return
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(params, "KNN");
    job.setJarByClass(KNNDTW.class);

    FileSystem inFs = inputPath.getFileSystem(params);
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inputPath);

    job.setMapperClass(KNNMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(TextWithDistance.class);

    job.setReducerClass(KNNReduce.class);
    job.setNumReduceTasks(1);

    job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    final Point queryPoint = (Point) params.getShape("point");
    final int k = params.getInt("k", 1);

    final IntWritable additional_blocks_2b_processed = new IntWritable(0);
    long resultCount;
    int iterations = 0;

    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000));
        } while (inFs.exists(outputPath));
    }
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outputPath);

    GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath);
    Configuration templateConf = job.getConfiguration();

    FileSystem outFs = outputPath.getFileSystem(params);
    // Start with the query point to select all partitions overlapping with
    // it
    Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y);

    do {
        job = new Job(templateConf);
        // Delete results of last iteration if not first iteration
        if (outputPath != null)
            outFs.delete(outputPath, true);

        LOG.info("Running iteration: " + (++iterations));
        // Set query range for the SpatialInputFormat
        OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration);

        // Submit the job
        if (params.getBoolean("background", false)) {
            // XXX this is incorrect because if the job needs multiple
            // iterations,
            // it will run only the first one
            job.waitForCompletion(false);
            return job;
        }
        job.waitForCompletion(false);

        // Retrieve answers for this iteration
        Counters counters = job.getCounters();
        Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS);
        resultCount = resultSizeCounter.getValue();

        if (globalIndex != null) {
            Circle range_for_next_iteration;
            if (resultCount < k) {
                LOG.info("Found only " + resultCount + " results");
                // Did not find enough results in the query space
                // Increase the distance by doubling the maximum distance
                // among all
                // partitions that were processed
                final DoubleWritable maximum_distance = new DoubleWritable(0);
                int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration,
                        new ResultCollector<Partition>() {
                            @Override
                            public void collect(Partition p) {
                                double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y);
                                if (distance > maximum_distance.get())
                                    maximum_distance.set(distance);
                            }
                        });
                if (matched_partitions == 0) {
                    // The query point is outside the search space
                    // Set the range to include the closest partition
                    globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() {
                        @Override
                        public void collect(Partition r, Double s) {
                            maximum_distance.set(s);
                        }
                    });
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2);
                LOG.info("Expanding to " + maximum_distance.get() * 2);
            } else {
                // Calculate the new test range which is a circle centered
                // at the
                // query point and distance to the k^{th} neighbor

                // Get distance to the kth neighbor
                final DoubleWritable distance_to_kth_neighbor = new DoubleWritable();
                FileStatus[] results = outFs.listStatus(outputPath);
                for (FileStatus result_file : results) {
                    if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) {
                        // Read the last line (kth neighbor)
                        Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(),
                                new ResultCollector<TextWithDistance>() {

                                    @Override
                                    public void collect(TextWithDistance r) {
                                        distance_to_kth_neighbor.set(r.distance);
                                    }
                                });
                    }
                }
                range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y,
                        distance_to_kth_neighbor.get());
                LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor);
            }

            // Calculate the number of blocks to be processed to check the
            // terminating condition;
            additional_blocks_2b_processed.set(0);
            final Shape temp = range_for_this_iteration;
            globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() {
                @Override
                public void collect(Partition p) {
                    if (!(p.isIntersected(temp))) {
                        additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1);
                    }
                }
            });
            range_for_this_iteration = range_for_next_iteration;
        }
    } while (additional_blocks_2b_processed.get() > 0);

    // If output file is not set by user, delete it
    if (userOutputPath == null)
        outFs.delete(outputPath, true);
    TotalIterations.addAndGet(iterations);

    return job;
}

From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java

License:Open Source License

public static void main(String[] args) throws Exception {

    //      args = new String[8];
    //      args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife";
    //      args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result";
    //      args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory";
    //      args[3] = "interval:2008-05-01,2008-05-30";
    //      args[4] = "time:month";
    //      args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883";
    //      args[6] = "-overwrite";
    //      args[7] = "-local";//"-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//from  w ww  .  j  a va2s .c om
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("traj") == null) {
        System.err.println("Trajectory query is missing");
        printUsage();
        System.exit(1);
    }

    // Invoke method to compute the trajectory MBR. 
    String rectangle = getTrajectoryRectangle(params.get("traj"));
    params.set("rect", rectangle);

    if (params.get("rect") == null) {
        System.err.println("You must provide a Trajectory Query");
        printUsage();
        System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint)) {
        LOG.error("Shape is not instance of STPoint");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (isFinished) {
        return false;
    }/* w  w w.  ja va 2s  .  c  om*/

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(conf);
    Path path = split.getPath();
    currentKey = new Text(path.getName());
    FSDataInputStream in = null;
    try {
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        currentValue.set(result, 0, fileLength);

    } finally {
        IOUtils.closeStream(in);
    }

    this.isFinished = true;
    return true;
}

From source file:eml.studio.server.file.FileDownloadServlet.java

License:Open Source License

/**
 * Method of download file from HDFS/*from w ww  .jav a2  s  .c  o m*/
 * 
 * @param filePath File or directory path (HDFS path)
 * @param fileName  file name
 * @param baseDir  path of parent file
 * @throws IOException
 */
private static void download(String filePath, String fileName, String baseDir) throws IOException {
    Path uri = new Path(filePath);
    if (HDFSIO.isDirectory(uri)) {
        File dirFile = new File(DOWNLOAD_BASE_DIR + "/" + baseDir + fileName);
        if (!dirFile.exists()) {
            logger.info("Download mkdir:" + dirFile.getPath());
            dirFile.mkdirs();
        }
        Path[] paths = HDFSIO.list(filePath);
        for (Path p : paths)
            download(p.toString(), p.getName(), baseDir + fileName + "/");
    } else {
        InputStream in = HDFSIO.downInputStream(filePath);
        logger.info("Download file:" + DOWNLOAD_BASE_DIR + "/" + baseDir + fileName);
        File file = new File(DOWNLOAD_BASE_DIR + "/" + baseDir + fileName);
        if (!file.exists())
            file.createNewFile();
        FileOutputStream out = new FileOutputStream(file);
        byte[] buffer = new byte[FILE_BUFFER_SIZE];
        int length;
        while ((length = in.read(buffer)) > 0) {
            out.write(buffer, 0, length);
        }
        in.close();
        out.flush();
        out.close();
    }
}

From source file:eml.studio.server.rpc.DatasetServiceImpl.java

License:Open Source License

/**
 * Get all files under a directory//from w ww .ja  va2s .com
 *
 * @param path  path of directory
 * @return list of names of all files
 * @throws Exception
 */
@Override
public List<String> getDirFilesPath(String path) throws Exception {
    // TODO Auto-generated method stub
    Path[] filePaths = HDFSIO.list(path);
    List<String> fileNames = new ArrayList<String>();
    for (Path p : filePaths) {
        fileNames.add(p.getName());
    }
    return fileNames;
}

From source file:eml.studio.server.util.HDFSIO.java

License:Open Source License

/**
 * Read the file or directory size of a file under HDFS
 * /*from  w w  w .j a v a 2 s .  co m*/
 * @param uri  target File or directory path
 * @return   The total size of all file sizes in the file size or directory (in KB)
 * @throws IOException
 */
public static double getFileSize(String uri) throws IOException {
    Path path = new Path(uri);
    long totalSize = 0;
    if (HDFSIO.isDirectory(path)) {
        Path[] dirFilePath = HDFSIO.list(uri);
        for (Path p : dirFilePath) {
            totalSize += getFileSize(uri + "/" + p.getName()) * 1000.0;
        }
    } else {
        FileStatus status = fs.getFileStatus(path);
        totalSize = status.getLen();
    }

    return totalSize / 1000.0;
}

From source file:eu.edisonproject.classification.tfidf.mapreduce.CompetencesDistanceDriver.java

License:Apache License

@Override
public int run(String[] args) {
    try {/*  w  w  w  .  ja v  a2  s.  c  o  m*/
        Configuration conf = HBaseConfiguration.create();
        //additional output using TextOutputFormat.
        conf.set("file.names", args[3]);

        Job job = Job.getInstance(conf);
        //TableMapReduceUtil.addDependencyJars(job); 
        job.setJarByClass(CompetencesDistanceDriver.class);
        //This row must be changed
        job.setJobName("Words Group By Title Driver");

        Path inPath = new Path(args[0]);
        Path outPath = new Path(args[1]);

        Path competencesPath = new Path(args[2]);
        Path competencesPathHDFS = competencesPath;
        FileSystem fs = FileSystem.get(conf);

        if (!conf.get(FileSystem.FS_DEFAULT_NAME_KEY).startsWith("file")) {
            competencesPathHDFS = new Path(competencesPath.getName());
            if (!fs.exists(competencesPathHDFS)) {
                fs.mkdirs(competencesPathHDFS);
                File[] stats = new File(competencesPath.toString()).listFiles();
                for (File stat : stats) {
                    Path filePath = new Path(stat.getAbsolutePath());
                    if (FilenameUtils.getExtension(filePath.getName()).endsWith("csv")) {
                        Path dest = new Path(competencesPathHDFS.toUri() + "/" + filePath.getName());
                        fs.copyFromLocalFile(filePath, dest);
                    }
                }
            }
        }
        job.addCacheFile(competencesPathHDFS.toUri());

        FileInputFormat.setInputPaths(job, inPath);

        FileOutputFormat.setOutputPath(job, outPath);
        fs.delete(outPath, true);

        job.setMapperClass(CompetencesDistanceMapper.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(CompetencesDistanceReducer.class);
        //            job.setOutputFormatClass(TableOutputFormat.class);
        //            job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "jobpostcompetence");
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        String[] fileNames = args[3].split(",");
        for (String n : fileNames) {
            MultipleOutputs.addNamedOutput(job, n, TextOutputFormat.class, Text.class, Text.class);
        }

        return (job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | IllegalStateException | IllegalArgumentException | InterruptedException
            | ClassNotFoundException ex) {
        Logger.getLogger(CompetencesDistanceDriver.class.getName()).log(Level.SEVERE, null, ex);
    }
    return 0;
}