Example usage for org.apache.hadoop.fs Path Path

List of usage examples for org.apache.hadoop.fs Path Path

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path Path.

Prototype

public Path(URI aUri) 

Source Link

Document

Construct a path from a URI

Usage

From source file:Inlinks.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: inlinks <in> [<in>...] <out>");
        System.exit(2);/* w w w. ja v a 2 s  . co  m*/
    }
    Job job = new Job(conf, "inlinks");
    job.setJarByClass(Inlinks.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IdentityReducer.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(10);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:WordCount.java

License:Apache License

public int run(String[] args) throws Exception {

    ///start//w  ww  .j  a  v  a2s  .  c  om
    final long startTime = System.currentTimeMillis();
    String outputReducerType = "filesystem";
    if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) {
        String[] s = args[0].split("=");
        if (s != null && s.length == 2)
            outputReducerType = s[1];
    }

    logger.info("output reducer type: " + outputReducerType);

    // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better
    ConfigHelper.setRangeBatchSize(getConf(), 99);

    for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) {
        String columnName = "userId";

        Job job = new Job(getConf(), "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);

        if (outputReducerType.equalsIgnoreCase("filesystem")) {
            job.setReducerClass(ReducerToFilesystem.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
        } else {
            job.setReducerClass(ReducerToCassandra.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(ByteBuffer.class);
            job.setOutputValueClass(List.class);
            job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
            ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            job.getConfiguration().set(CONF_COLUMN_NAME, "sum");
        }
        job.setInputFormatClass(ColumnFamilyInputFormat.class);
        ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
        ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner");
        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);

        SlicePredicate predicate = new SlicePredicate()
                .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

        // this will cause the predicate to be ignored in favor of scanning everything as a wide row          
        //Son degisiklik
        // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true);
        //System.out.println("tessssssaaat");

        ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
        ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner");

        job.waitForCompletion(true);
    }

    //print
    final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after
    System.out.println();
    System.out.println("Job Finished in " + duration + " seconds");
    System.out.println();

    return 0;
}

From source file:AggregatedLogsPurger.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    this.conf = getConf();
    this.deleteOlderThanDays = getConf().getInt("deleteOlderThan", 0);
    Preconditions.checkArgument(deleteOlderThanDays > 1,
            "Usage: yarn jar " + "./target/yarn-logs-purger-1.0-SNAPSHOT.jar -DdeleteOlderThan=300 "
                    + "-DdeleteFiles=true.  Please provide valid argument for deleteOlderThanDays. It has to "
                    + "be > 0");
    this.shouldDelete = getConf().getBoolean("deleteFiles", false);

    this.suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf);
    this.rootLogDir = new Path(
            conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
    return (purge()) ? 0 : -1;
}

From source file:TopFiveAverageMoviesRatedByFemales.java

public static void main(String[] args) throws Exception {
    JobConf conf1 = new JobConf();
    Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales");
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class);
    org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]),
            TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class);

    job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

    job1.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job1, new Path(args[3]));

    boolean flag = job1.waitForCompletion(true);
    boolean flag1 = false;
    boolean flag2 = false;

    if (flag) {//  w  w w . j  av a2s.c o  m
        JobConf conf2 = new JobConf();
        Job job2 = new Job(conf2, "AverageCalculation");

        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class);
        //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class);

        job2.setMapperClass(MapAverage.class);
        job2.setReducerClass(ReduceAverage.class);
        job2.setMapOutputKeyClass(Text.class);
        job2.setMapOutputValueClass(Text.class);
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job2.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job2, new Path(args[3]));
        FileOutputFormat.setOutputPath(job2, new Path(args[4]));

        flag1 = job2.waitForCompletion(true);
    }

    if (flag1) {
        JobConf conf3 = new JobConf();
        Job job3 = new Job(conf3, "AverageCalculation");

        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]),
                TextInputFormat.class, MapAverageTop5.class);
        org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]),
                TextInputFormat.class, MapMovieName.class);

        //job3.setMapperClass(MapAverageTop5.class);
        job3.setReducerClass(ReduceAverageTop5.class);
        job3.setMapOutputKeyClass(Text.class);
        job3.setMapOutputValueClass(Text.class);
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(Text.class);
        job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class);

        job3.setOutputFormatClass(TextOutputFormat.class);
        //FileInputFormat.addInputPath(job3, new Path(args[4]));
        FileOutputFormat.setOutputPath(job3, new Path(args[5]));

        flag2 = job3.waitForCompletion(true);

    }
}

From source file:GenIndex.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*w  w w . j  a  v a2s.  co  m*/
    }
    String tmpPath = "/local_scratch/wordcount/tmp";
    String stopWord = "/local_scratch/wordcount/stopword";

    // Job to count the words
    Job count_job = new Job(conf, "word count");
    count_job.setJarByClass(GenIndex.class);
    count_job.setMapperClass(Mapper1_Count.class);
    count_job.setCombinerClass(Reducer1_Count.class);
    count_job.setReducerClass(Reducer1_Count.class);

    count_job.setOutputKeyClass(Text.class);
    count_job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(count_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(count_job, new Path(tmpPath));
    count_job.waitForCompletion(true);

    Job sort_job = new Job(conf, "word sort");
    sort_job.setJarByClass(GenIndex.class);
    sort_job.setMapperClass(Mapper2_Sort.class);
    sort_job.setCombinerClass(Reducer2_Sort.class);
    sort_job.setReducerClass(Reducer2_Sort.class);
    sort_job.setSortComparatorClass(SortReducerByValuesKeyComparator.class);
    sort_job.setOutputKeyClass(IntWritable.class);
    sort_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(sort_job, new Path(tmpPath));
    FileOutputFormat.setOutputPath(sort_job, new Path(stopWord));

    sort_job.waitForCompletion(true);

    // job to generate the index
    Job index_job = new Job(conf, "word index");
    index_job.setJarByClass(GenIndex.class);
    index_job.setMapperClass(Mapper3_index.class);
    index_job.setCombinerClass(Reducer3_index.class);
    index_job.setReducerClass(Reducer3_index.class);

    index_job.setOutputKeyClass(Text.class);
    index_job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(index_job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(index_job, new Path(otherArgs[1]));

    index_job.waitForCompletion(true);

    System.exit(0);
}

From source file:SingleFileReader.java

License:Apache License

private void writeFile(String cached, String fSize) throws Exception {
    fileSize = Double.parseDouble((fSize.split("g|G"))[0]) * 1024 * 1024 * 1024;
    String hdfsFolder = "/hdfs_test/";
    String hdfsFile = hdfsFolder + fSize;
    short replication = 1;
    boolean overWrite = true;
    int blockSize = 536870912;
    double numIters = fileSize / (double) bufferSize;

    Configuration conf = getConf();
    fs = FileSystem.get(conf);/*  w ww .  ja  v a 2s  .  c o m*/
    hdfsFilePath = new Path(hdfsFile);
    OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize);

    /* Initialize byte buffer */
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    buf.order(ByteOrder.nativeOrder());
    for (int k = 0; k < bufferSize / Integer.SIZE; k++) {
        buf.putInt(k);
    }
    buf.flip();

    /* Write the content of the byte buffer 
     to the HDFS file*/
    t = new Timer();
    t.start(0);
    for (long i = 0; i < numIters; i++) {
        os.write(buf.array());
        buf.flip();
    }
    t.end(0);
    os.close();

    /* Check to see if the file needs to be cached */
    t.start(1);
    if (cached.equals("cache")) {
        String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                + " -pool hdfs_test";
        // System.out.println(cmdStr);
        Process p = Runtime.getRuntime().exec(cmdStr);
        p.waitFor();
        String cmdOutLine = "";
        StringBuffer cmdOutBuf = new StringBuffer();
        BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
        while ((cmdOutLine = cmdOutReader.readLine()) != null) {
            cmdOutBuf.append(cmdOutLine + "\n");
        }
        // System.out.println(cmdOutBuf.toString());
    }
    t.end(1);
}

From source file:BMTKeyValueLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), CassandraTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("BMTKeyValueLoader");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);//w  w w.  j  a va 2s  .c o m
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}//from  ww  w. ja v  a  2s. com
 */
@Override
public Path getHomeDirectory() {
    return new Path(getRawHomeDirectory()).makeQualified(this.getUri(), this.getWorkingDirectory());
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*//*from   w  w  w  . j  a v  a2s  .c om*/
public static int countTerms(Configuration conf) {
    dbg("postProcess ()");

    int count = 0;

    String output = conf.get("mapred.output.dir");

    if (output != null) {
        if (output.isEmpty() == true)
            output = HoopLink.outputpath;
    } else
        output = HoopLink.outputpath;

    Path inFile = new Path(output + "/part-r-00000");
    FSDataInputStream in = null;

    @SuppressWarnings("unused")
    String thisLine = null;

    try {
        in = HoopRemoteTask.hdfs.open(inFile);

        BufferedReader reader = new BufferedReader(new InputStreamReader(in));

        while ((thisLine = reader.readLine()) != null) {
            count++;
        }

        in.close();
    } catch (IOException e) {
        e.printStackTrace();
        dbg("Error opening file in HDFS");
    }

    return (count);
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*///  w  ww.  j a  v a  2s.c  o  m
public static void postProcess(Configuration conf) {
    dbg("postProcess ()");

    if (HoopLink.nrshards == 1) {
        dbg("Only 1 shard needed, skipping post processing");
        return;
    }

    if (HoopLink.shardcreate.equals("mos") == true) {
        dbg("We shouldn't be pos-processing since the HoopShardedOutputFormat class already did this");
        return;
    }

    if (HoopLink.shardcreate.equals("hdfs") == true) {
        dbg("Starting shard post-process task ...");

        int termCount = countTerms(conf);

        String output = conf.get("mapred.output.dir");

        if (output != null) {
            if (output.isEmpty() == true)
                output = HoopLink.outputpath;
        } else
            output = HoopLink.outputpath;

        dbg("Post processing " + termCount + " items in: " + output);

        Path inFile = new Path(output + "/part-r-00000");
        Path outFile = null;
        FSDataInputStream in = null;
        FSDataOutputStream out = null;

        try {
            in = HoopRemoteTask.hdfs.open(inFile);

            BufferedReader reader = new BufferedReader(new InputStreamReader(in));

            String thisLine;

            int count = 0;
            int split = Math.round(termCount / HoopLink.nrshards);
            int partition = 0;

            outFile = new Path(output + "/partition-" + partition + "-00000.txt");
            out = HoopRemoteTask.hdfs.create(outFile);

            if (out != null) {
                while ((thisLine = reader.readLine()) != null) {
                    StringBuffer formatted = new StringBuffer();
                    formatted.append(thisLine);
                    formatted.append("\n");

                    count++;

                    if (count > split) {
                        out.close();

                        partition++;

                        outFile = new Path(output + "/partition-" + partition + "-00000.txt");
                        out = HoopRemoteTask.hdfs.create(outFile);

                        split++;
                        count = 0;
                    }

                    byte[] utf8Bytes = formatted.toString().getBytes("UTF8");
                    // We get an additional 0 because of Java string encoding. leave it out!
                    out.write(utf8Bytes);
                }

                if (in != null)
                    in.close();

                if (out != null)
                    out.close();
            } else
                dbg("Error: unable to open output file");

        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        dbg("Starting rudimentary sharding into " + HoopLink.nrshards);

        if (in != null) {

            try {
                in.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

    }

    HoopStatistics stats = new HoopStatistics();
    String results = stats.printStatistics(null);
    dbg(results);
}