Example usage for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopGetTest.java

License:Apache License

/**
 * Run as simulation mode./*from w  ww . java 2s  . c  o  m*/
 * @throws Exception if failed
 */
@Test
public void simulated() throws Exception {
    RuntimeContext.set(RuntimeContext.DEFAULT.mode(ExecutionMode.SIMULATION));

    Path testing = new Path(PREFIX, "testing");
    put(testing, "Hello, world!");

    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    int result = new WindGateHadoopGet(conf).execute(buffer, testing.toString());
    assertThat(result, is(0));

    Map<String, String> contents = get(buffer.toByteArray());
    assertThat(contents.toString(), contents.size(), is(0));
}

From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopGetTest.java

License:Apache License

/**
 * Attemts to get missing files in simulation mode.
 * @throws Exception if failed/*  w w  w  .ja v  a2  s.  c om*/
 */
@Test
public void missing_sim() throws Exception {
    RuntimeContext.set(RuntimeContext.DEFAULT.mode(ExecutionMode.SIMULATION));

    Path testing = new Path(PREFIX, "testing");

    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    int result = new WindGateHadoopGet(conf).execute(buffer, testing.toString());
    assertThat(result, is(0));
}

From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopPutTest.java

License:Apache License

/**
 * Not empty arguments.//from  w w  w .java 2  s  . c o m
 * @throws Exception if failed
 */
@Test
public void arguments() throws Exception {
    Path testing = new Path(PREFIX, "testing");
    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    try (FileList.Writer writer = FileList.createWriter(buffer)) {
        put(writer, testing, "Hello, world!");
    }
    ByteArrayInputStream in = new ByteArrayInputStream(buffer.toByteArray());
    int result = new WindGateHadoopPut(conf).execute(in, testing.toString());
    assertThat(result, is(not(0)));
}

From source file:com.benchmark.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*www .j  ava2  s .c  o m*/
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    //final Path inDir = new Path(TMP_DIR, "in");
    final Path inDir = new Path("/home/hadoop1/tmp_dir", "in");
    System.out.println("inDir =" + inDir.toString());
    //final Path outDir = new Path(TMP_DIR, "out");
    final Path outDir = new Path("/home/hadoop1/tmp_dir", "out");
    System.out.println("outDir =" + outDir.toString());
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.benchmark.mapred.Sort.java

License:Apache License

/**
 * The main driver for sort program.//from  w ww .ja v a2  s.  c  om
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
public int run(String[] args) throws Exception {

    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(inputFormatClass);
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:com.benchmark.mapred.terasort.TeraSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");
    JobConf job = (JobConf) getConf();/*from  w  ww .ja va 2s  . c  o m*/
    Path inputDir = new Path(args[0]);
    if (args.length != 3) {
        System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 3.");
    }
    inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
    Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
    URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME);
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSort");
    job.setJarByClass(TeraSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormat(TeraInputFormat.class);
    job.setOutputFormat(TeraOutputFormat.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    TeraInputFormat.writePartitionFile(job, partitionFile);
    DistributedCache.addCacheFile(partitionUri, job);
    DistributedCache.createSymlink(job);
    job.setInt("dfs.replication", 1);
    TeraOutputFormat.setFinalSync(job, true);
    Date startIteration = new Date();
    JobClient.runJob(job);
    Date endIteration = new Date();
    System.out.println(
            "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds.");
    LOG.info("done");
    return 0;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Set the array of {@link Path}s as the list of inputs
 * for the map-reduce job./*from   w  ww .j  a  v a2  s.  c o m*/
 * 
 * @param conf Configuration of the job. 
 * @param inputPaths the {@link Path}s of the input directories/files 
 * for the map-reduce job.
 */
public static void setInputPaths(JobConf conf, Path... inputPaths) {
    Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for (int i = 1; i < inputPaths.length; i++) {
        str.append(StringUtils.COMMA_STR);
        path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
        str.append(StringUtils.escapeString(path.toString()));
    }
    conf.set("mapred.input.dir", str.toString());
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * /*from ww  w  . j a v  a  2  s .c om*/
 * @param conf The configuration of the job 
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(JobConf conf, Path path) {
    path = new Path(conf.getWorkingDirectory(), path);
    String dirStr = StringUtils.escapeString(path.toString());
    String dirs = conf.get("mapred.input.dir");
    conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr);
}

From source file:com.bigjob.Client.java

License:Apache License

private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    LOG.debug("HDFS Destination for Script: " + dst.toString());
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {//ww  w  .  jav a 2 s  .  c om
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
            scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java

License:Apache License

public void load() throws IOException {

    InputStream stopwordStream = null;
    InputStream synonumStream = null;

    Configuration hsearchConf = HSearchConfig.getInstance().getConfiguration();
    String filenameSynonum = hsearchConf.get("synonyms.file.location", "synonyms.txt");
    String filenameStopword = hsearchConf.get("stopword.file.location", "stopwords.txt");

    isLowerCaseEnabled = hsearchConf.getBoolean("lucene.analysis.lowercasefilter", true);
    isAccentFilterEnabled = hsearchConf.getBoolean("lucene.analysis.accentfilter", true);
    isSnoballStemEnabled = hsearchConf.getBoolean("lucene.analysis.snowballfilter", true);
    isStopFilterEnabled = hsearchConf.getBoolean("lucene.analysis.stopfilter", true);

    if (null != stopwords)
        return;//  w  w w .  j  a va 2 s  .  c o  m

    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(conf);

    if (null != fs) {

        /**
         * STOPWORD
         */
        Path stopPath = new Path(filenameStopword);
        if (fs.exists(stopPath)) {
            if (DEBUG_ENABLED)
                IdSearchLog.l.debug("Loading Stopword file from HDFS :" + stopPath.toString());
            stopwordStream = fs.open(stopPath);
        } else {
            IdSearchLog.l.fatal("Stopword file not available in HDFS :" + stopPath.toString());
        }

        /**
         * SYNONUM
         */

        Path synPath = new Path(filenameSynonum);
        if (fs.exists(synPath)) {
            synonumStream = fs.open(synPath);
            if (DEBUG_ENABLED)
                IdSearchLog.l.debug("Loading synonym file from HDFS :" + filenameSynonum.toString());
        } else {
            IdSearchLog.l.fatal("Synonym file not available in HDFS :" + filenameSynonum.toString());
            IdSearchLog.l.fatal("Working Directory :" + fs.getWorkingDirectory().getName());
        }
    }

    ClassLoader classLoader = null;

    if (null == stopwordStream || null == synonumStream) {
        classLoader = Thread.currentThread().getContextClassLoader();
    }

    if (null == stopwordStream) {
        URL stopUrl = classLoader.getResource(filenameStopword);
        if (null != stopUrl) {
            String stopFile = stopUrl.getPath();
            if (null != stopFile) {
                File stopwordFile = new File(stopFile);
                if (stopwordFile.exists() && stopwordFile.canRead()) {
                    stopwordStream = new FileInputStream(stopwordFile);
                    if (DEBUG_ENABLED)
                        IdSearchLog.l
                                .debug("Loading Stopword file from Local :" + stopwordFile.getAbsolutePath());
                } else {
                    IdSearchLog.l.fatal("Stopword file not available at :" + stopwordFile.getAbsolutePath());
                    IdSearchLog.l.fatal("Working Directory :" + fs.getHomeDirectory().getName());
                }
            } else {
                if (DEBUG_ENABLED)
                    IdSearchLog.l.debug("Ignoring Stopwords > " + filenameStopword);
            }
        }
    }

    if (null == synonumStream) {
        URL synUrl = classLoader.getResource(filenameSynonum);
        if (null != synUrl) {
            String synFileName = synUrl.getPath();
            if (null != synFileName) {
                File synFile = new File(synFileName);
                if (synFile.exists() && synFile.canRead()) {
                    synonumStream = new FileInputStream(synFile);
                    if (DEBUG_ENABLED)
                        IdSearchLog.l.debug("Loading Synonum file from Local :" + synFile.getAbsolutePath());
                } else {
                    if (DEBUG_ENABLED)
                        IdSearchLog.l.debug("Synonum file not available at :" + synFile.getAbsolutePath());
                }
            } else {
                if (DEBUG_ENABLED)
                    IdSearchLog.l.debug("Ignoring Synonyms > " + filenameSynonum);
            }
        }
    }

    load(stopwordStream, synonumStream);
}