Example usage for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri)

Source Link

Document

Construct a path from a URI

Usage

From source file:Inlinks.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: inlinks <in> [<in>...] <out>");
        System.exit(2);/* w w w. ja v a 2 s  . co  m*/
    }
    Job job = new Job(conf, "inlinks");
    job.setJarByClass(Inlinks.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IdentityReducer.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(10);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    ///start//w  ww  .j  a  v  a2s  .  c  om
    final long startTime = System.currentTimeMillis();
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }

    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) {
        String columnName = "userId";

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);
            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }
        job.setInputFormatClass(ColumnFamilyInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);

        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        // this will cause the predicate to be ignored in favor of scanning everything as a wide row          
        //Son degisiklik
        // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        //System.out.println("tessssssaaat");

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner");

        job.waitForCompletion(true);
    }

    //print
    final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after
    System.out.println();
    System.out.println("Job Finished in " + duration + " seconds");
    System.out.println();

    return 0;
}

From source file:AggregatedLogsPurger.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    this.conf = getConf();
    this.deleteOlderThanDays = getConf().getInt("deleteOlderThan", 0);
    Preconditions.checkArgument(deleteOlderThanDays > 1,
            "Usage: yarn jar " + "./target/yarn-logs-purger-1.0-SNAPSHOT.jar -DdeleteOlderThan=300 "
                    + "-DdeleteFiles=true.  Please provide valid argument for deleteOlderThanDays. It has to "
                    + "be > 0");
    this.shouldDelete = getConf().getBoolean("deleteFiles", false);

    this.suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf);
    this.rootLogDir = new Path(
            conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
    return (purge()) ? 0 : -1;
}

From source file:TopFiveAverageMoviesRatedByFemales.java

public static void main(String[] args) throws Exception {
    JobConf conf1 = new JobConf();
    Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales");
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class);
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class);

    job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

    job1.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[3]));

    boolean flag = job1.waitForCompletion(true);
    boolean flag1 = false;
    boolean flag2 = false;

    if (flag) {//  w  w w . j  av a2s.c o  m
        JobConf conf2 = new JobConf();
        Job job2 = new Job(conf2, "AverageCalculation");

        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class);
        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class);

        job2.setMapperClass(MapAverage.class);
        job2.setReducerClass(ReduceAverage.class);
        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job2.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));

        flag1 = job2.waitForCompletion(true);
    }

    if (flag1) {
        JobConf conf3 = new JobConf();
        Job job3 = new Job(conf3, "AverageCalculation");

        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]),
                TextInputFormat.class, MapAverageTop5.class);
        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]),
                TextInputFormat.class, MapMovieName.class);

        //job3.setMapperClass(MapAverageTop5.class);
        job3.setReducerClass(ReduceAverageTop5.class);
        job3.setMapOutputKeyClass(Text.class);
        job3.setMapOutputValueClass(Text.class);
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(Text.class);
        job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job3.setOutputFormatClass(TextOutputFormat.class);
        //FileInputFormat.addInputPath(job3, new Path(args[4]));
        FileOutputFormat.setOutputPath(job3, new Path(args[5]));

        flag2 = job3.waitForCompletion(true);

    }
}

From source file:GenIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*w  w w . j  a  v a2s.  co  m*/
    }
    String tmpPath = "/local_scratch/wordcount/tmp";
    String stopWord = "/local_scratch/wordcount/stopword";

    // Job to count the words
    Job count_job = new Job(conf, "word count");
    count_job.setJarByClass(GenIndex.class);
    count_job.setMapperClass(Mapper1_Count.class);
    count_job.setCombinerClass(Reducer1_Count.class);
    count_job.setReducerClass(Reducer1_Count.class);

    count_job.setOutputKeyClass(Text.class);
    count_job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(count_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(count_job, new Path(tmpPath));
    count_job.waitForCompletion(true);

    Job sort_job = new Job(conf, "word sort");
    sort_job.setJarByClass(GenIndex.class);
    sort_job.setMapperClass(Mapper2_Sort.class);
    sort_job.setCombinerClass(Reducer2_Sort.class);
    sort_job.setReducerClass(Reducer2_Sort.class);
    sort_job.setSortComparatorClass(SortReducerByValuesKeyComparator.class);
    sort_job.setOutputKeyClass(IntWritable.class);
    sort_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sort_job, new Path(tmpPath));
    FileOutputFormat.setOutputPath(sort_job, new Path(stopWord));

    sort_job.waitForCompletion(true);

    // job to generate the index
    Job index_job = new Job(conf, "word index");
    index_job.setJarByClass(GenIndex.class);
    index_job.setMapperClass(Mapper3_index.class);
    index_job.setCombinerClass(Reducer3_index.class);
    index_job.setReducerClass(Reducer3_index.class);

    index_job.setOutputKeyClass(Text.class);
    index_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(index_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(index_job, new Path(otherArgs[1]));

    index_job.waitForCompletion(true);

    System.exit(0);
}

From source file:SingleFileReader.java

License:Apache License

private void writeFile(String cached, String fSize) throws Exception {
    fileSize = Double.parseDouble((fSize.split("g|G"))[0]) * 1024 * 1024 * 1024;
    String hdfsFolder = "/hdfs_test/";
    String hdfsFile = hdfsFolder + fSize;
    short replication = 1;
    boolean overWrite = true;
    int blockSize = 536870912;
    double numIters = fileSize / (double) bufferSize;

    Configuration conf = getConf();
    fs = FileSystem.get(conf);/*  w ww .  ja  v a 2s  .  c o m*/
    hdfsFilePath = new Path(hdfsFile);
    OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize);

    /* Initialize byte buffer */
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    buf.order(ByteOrder.nativeOrder());
    for (int k = 0; k < bufferSize / Integer.SIZE; k++) {
        buf.putInt(k);
    }
    buf.flip();

    /* Write the content of the byte buffer 
     to the HDFS file*/
    t = new Timer();
    t.start(0);
    for (long i = 0; i < numIters; i++) {
        os.write(buf.array());
        buf.flip();
    }
    t.end(0);
    os.close();

    /* Check to see if the file needs to be cached */
    t.start(1);
    if (cached.equals("cache")) {
        String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                + " -pool hdfs_test";
        // System.out.println(cmdStr);
        Process p = Runtime.getRuntime().exec(cmdStr);
        p.waitFor();
        String cmdOutLine = "";
        StringBuffer cmdOutBuf = new StringBuffer();
        BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
        while ((cmdOutLine = cmdOutReader.readLine()) != null) {
            cmdOutBuf.append(cmdOutLine + "\n");
        }
        // System.out.println(cmdOutBuf.toString());
    }
    t.end(1);
}

From source file:BMTKeyValueLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), CassandraTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("BMTKeyValueLoader");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);//w  w w.  j  a va 2s  .c o m
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}//from  ww  w. ja v  a  2s. com
 */
@Override
public Path getHomeDirectory() {
    return new Path(getRawHomeDirectory()).makeQualified(this.getUri(), this.getWorkingDirectory());
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*//*from   w  w  w  . j  a v  a2s  .c om*/
public static int countTerms(Configuration conf) {
    dbg("postProcess ()");

    int count = 0;

    String output = conf.get("mapred.output.dir");

    if (output != null) {
        if (output.isEmpty() == true)
            output = HoopLink.outputpath;
    } else
        output = HoopLink.outputpath;

    Path inFile = new Path(output + "/part-r-00000");
    FSDataInputStream in = null;

    @SuppressWarnings("unused")
    String thisLine = null;

    try {
        in = HoopRemoteTask.hdfs.open(inFile);

        BufferedReader reader = new BufferedReader(new InputStreamReader(in));

        while ((thisLine = reader.readLine()) != null) {
            count++;
        }

        in.close();
    } catch (IOException e) {
        e.printStackTrace();
        dbg("Error opening file in HDFS");
    }

    return (count);
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*///  w  ww.  j a  v a  2s.c  o  m
public static void postProcess(Configuration conf) {
    dbg("postProcess ()");

    if (HoopLink.nrshards == 1) {
        dbg("Only 1 shard needed, skipping post processing");
        return;
    }

    if (HoopLink.shardcreate.equals("mos") == true) {
        dbg("We shouldn't be pos-processing since the HoopShardedOutputFormat class already did this");
        return;
    }

    if (HoopLink.shardcreate.equals("hdfs") == true) {
        dbg("Starting shard post-process task ...");

        int termCount = countTerms(conf);

        String output = conf.get("mapred.output.dir");

        if (output != null) {
            if (output.isEmpty() == true)
                output = HoopLink.outputpath;
        } else
            output = HoopLink.outputpath;

        dbg("Post processing " + termCount + " items in: " + output);

        Path inFile = new Path(output + "/part-r-00000");
        Path outFile = null;
        FSDataInputStream in = null;
        FSDataOutputStream out = null;

        try {
            in = HoopRemoteTask.hdfs.open(inFile);

            BufferedReader reader = new BufferedReader(new InputStreamReader(in));

            String thisLine;

            int count = 0;
            int split = Math.round(termCount / HoopLink.nrshards);
            int partition = 0;

            outFile = new Path(output + "/partition-" + partition + "-00000.txt");
            out = HoopRemoteTask.hdfs.create(outFile);

            if (out != null) {
                while ((thisLine = reader.readLine()) != null) {
                    StringBuffer formatted = new StringBuffer();
                    formatted.append(thisLine);
                    formatted.append("\n");

                    count++;

                    if (count > split) {
                        out.close();

                        partition++;

                        outFile = new Path(output + "/partition-" + partition + "-00000.txt");
                        out = HoopRemoteTask.hdfs.create(outFile);

                        split++;
                        count = 0;
                    }

                    byte[] utf8Bytes = formatted.toString().getBytes("UTF8");
                    // We get an additional 0 because of Java string encoding. leave it out!
                    out.write(utf8Bytes);
                }

                if (in != null)
                    in.close();

                if (out != null)
                    out.close();
            } else
                dbg("Error: unable to open output file");

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        dbg("Starting rudimentary sharding into " + HoopLink.nrshards);

        if (in != null) {

            try {
                in.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    }

    HoopStatistics stats = new HoopStatistics();
    String results = stats.printStatistics(null);
    dbg(results);
}