Example usage for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:cn.edu.xmu.dm.mapreduce.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job./*from  w  w  w. ja v a 2s . c  o  m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Sorter");
    job.setJarByClass(Sort.class);

    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(inputFormatClass);
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java

License:Apache License

/** Run a map/reduce job to compute Pi. */
private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf,
        PrintStream out) throws IOException {
    final String name = startDigit + "_" + nDigits;

    // setup wroking directory
    out.println("Working Directory = " + workingDir);
    out.println();/*from  w  w w  .ja va 2  s. c  o m*/
    // final FileSystem fs = FileSystem.get(conf);// ?
    final FileSystem fs = new Path(workingDir, "part-r-00000").getFileSystem(conf);// ?
    final Path dir = fs.makeQualified(new Path(workingDir));
    if (fs.exists(dir)) {
        throw new IOException("Working directory " + dir + " already exists.  Please remove it first.");
    } else if (!fs.mkdirs(dir)) {
        throw new IOException("Cannot create working directory " + dir);
    }

    out.println("Start Digit      = " + startDigit);
    out.println("Number of Digits = " + nDigits);
    out.println("Number of Maps   = " + nMaps);

    // setup a job
    final Job job = createJob(name, conf);
    final Path hexfile = new Path(dir, "pi_" + name + ".hex");
    FileOutputFormat.setOutputPath(job, new Path(dir, "out"));

    // setup custom properties
    job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString());
    job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString());

    job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit);
    job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits);
    job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps);

    // start a map/reduce job
    out.println("\nStarting Job ...");
    final long startTime = System.currentTimeMillis();
    try {
        if (!job.waitForCompletion(true)) {
            out.println("Job failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        out.println("Duration is " + duration + " seconds.");
    }
    out.println("Output file: " + hexfile);
}

From source file:cn.lhfei.hadoop.ch03.ListStatus.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];//from   w ww .j a  va 2 s. co m
    Configuration conf = new Configuration();
    FileSystem fs = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);

        Path[] paths = new Path[args.length];
        for (int i = 0; i < paths.length; i++) {
            paths[i] = new Path(args[i]);
        }

        FileStatus[] status = fs.listStatus(paths);
        Path[] listPath = FileUtil.stat2Paths(status);

        for (Path p : listPath) {
            log.info(p.toString());
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cn.lhfei.hadoop.ch03.RegexExcludePathFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    return !path.toString().matches(regex);
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Generate the outfile name based on a given anme and the input file name.
 * If the map input file does not exists (i.e. this is not for a map only
 * job), the given name is returned unchanged. If the config value for
 * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
 * name is returned unchanged. Otherwise, return a file name consisting of
 * the N trailing legs of the input file name where N is the config value
 * for "num.of.trailing.legs.to.use".//w w w . j  ava2s . co  m
 * 
 * @param job
 *            the job config
 * @param name
 *            the output file name
 * @return the outfile name based on a given anme and the input file name.
 */
protected String getInputFileBasedOutputFileName(JobConf job, String name) {
    String infilepath = job.get("map.input.file");
    if (infilepath == null) {
        // if the map input file does not exists, then return the given name
        return name;
    }
    int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
    if (numOfTrailingLegsToUse <= 0) {
        return name;
    }
    Path infile = new Path(infilepath);
    Path parent = infile.getParent();
    String midName = infile.getName();
    Path outPath = new Path(midName);
    for (int i = 1; i < numOfTrailingLegsToUse; i++) {
        if (parent == null)
            break;
        midName = parent.getName();
        if (midName.length() == 0)
            break;
        parent = parent.getParent();
        outPath = new Path(midName, outPath);
    }
    return outPath.toString();
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*  www .  j  ava2s . c o m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key?
    key = new Text(file.toString());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:co.cask.cdap.data2.increment.hbase10.IncrementSummingScannerTest.java

License:Apache License

static HRegion createRegion(Configuration hConf, CConfiguration cConf, TableId tableId, HColumnDescriptor cfd)
        throws Exception {
    HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
    HTableDescriptorBuilder htd = tableUtil.buildHTableDescriptor(tableId);
    cfd.setMaxVersions(Integer.MAX_VALUE);
    cfd.setKeepDeletedCells(true);/*from  w w w .j  ava 2s  .c o m*/
    htd.addFamily(cfd);
    htd.addCoprocessor(IncrementHandler.class.getName());

    HTableDescriptor desc = htd.build();
    String tableName = desc.getNameAsString();
    Path tablePath = new Path("/tmp/" + tableName);
    Path hlogPath = new Path("/tmp/hlog-" + tableName);
    FileSystem fs = FileSystem.get(hConf);
    assertTrue(fs.mkdirs(tablePath));
    WALFactory walFactory = new WALFactory(hConf, null, hlogPath.toString());
    WAL hLog = walFactory.getWAL(new byte[] { 1 });
    HRegionInfo regionInfo = new HRegionInfo(desc.getTableName());
    HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(hConf, fs, tablePath, regionInfo);
    return new HRegion(regionFS, hLog, hConf, desc, new LocalRegionServerServices(hConf,
            ServerName.valueOf(InetAddress.getLocalHost().getHostName(), 0, System.currentTimeMillis())));
}

From source file:co.cask.cdap.template.etl.common.BatchFileFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    String filePathName = path.toString();
    //The path filter will first check the directory if a directory is given
    if (filePathName.equals(pathName) || filePathName.equals(pathName + "/")) {
        return true;
    }/*from  w  w  w .j  a v  a  2 s.c  o m*/

    //filter by file name using regex from configuration
    if (!useTimeFilter) {
        Matcher matcher = regex.matcher(filePathName);
        return matcher.matches();
    }

    //use hourly time filter
    if (lastRead.equals("-1")) {
        String currentTime = sdf.format(prevHour);
        return filePathName.contains(currentTime);
    }

    //use stateful time filter
    Date fileDate;
    String filename = path.getName();
    try {
        fileDate = sdf.parse(filename.substring(0, DATE_LENGTH));
    } catch (Exception pe) {
        //Try to parse cloudfront format
        try {
            int startIndex = filename.indexOf(".") + 1;
            fileDate = sdf.parse(filename.substring(startIndex, startIndex + DATE_LENGTH));
        } catch (Exception e) {
            LOG.warn("Couldn't parse file: " + filename);
            return false;
        }
    }
    return isWithinRange(fileDate);
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Path destination = new Path(config.getDestDirectory());
    FileSystem fileSystem = FileSystem.get(new Configuration());
    destination = fileSystem.makeQualified(destination);
    if (!fileSystem.exists(destination)) {
        fileSystem.mkdirs(destination);/*ww w  . j  a v  a 2  s .c  o  m*/
    }

    FTPClient ftp;
    if ("ftp".equals(config.getProtocol().toLowerCase())) {
        ftp = new FTPClient();
    } else {
        ftp = new FTPSClient();
    }
    ftp.setControlKeepAliveTimeout(5);
    // UNIX type server
    FTPClientConfig ftpConfig = new FTPClientConfig();
    // Set additional parameters required for the ftp
    // for example config.setServerTimeZoneId("Pacific/Pitcairn")
    ftp.configure(ftpConfig);
    try {
        ftp.connect(config.getHost(), config.getPort());
        ftp.enterLocalPassiveMode();
        String replyString = ftp.getReplyString();
        LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(),
                config.getPort(), replyString);

        // Check the reply code for actual success
        int replyCode = ftp.getReplyCode();

        if (!FTPReply.isPositiveCompletion(replyCode)) {
            ftp.disconnect();
            throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.",
                    replyCode, replyString));
        }

        if (!ftp.login(config.getUserName(), config.getPassword())) {
            LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString());
            ftp.logout();
            throw new RuntimeException(String.format(
                    "Login to the FTP server %s and port %s failed. " + "Please check user name and password.",
                    config.getHost(), config.getPort()));
        }

        FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory());
        LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString());
        // Check the reply code for listFiles call.
        // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted
        if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) {
            // encrypt data channel and listFiles again
            ((FTPSClient) ftp).execPROT("P");
            LOG.info("Attempting command listFiles on encrypted data channel.");
            ftpFiles = ftp.listFiles(config.getSrcDirectory());
        }
        for (FTPFile file : ftpFiles) {
            String source = config.getSrcDirectory() + "/" + file.getName();

            LOG.info("Current file {}, source {}", file.getName(), source);
            if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) {
                copyZip(ftp, source, fileSystem, destination);
            } else {
                Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName()));
                LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString());
                try (OutputStream output = fileSystem.create(destinationPath)) {
                    InputStream is = ftp.retrieveFileStream(source);
                    ByteStreams.copy(is, output);
                }
            }
            if (!ftp.completePendingCommand()) {
                LOG.error("Error completing command.");
            }
        }
        ftp.logout();
    } finally {
        if (ftp.isConnected()) {
            try {
                ftp.disconnect();
            } catch (Throwable e) {
                LOG.error("Failure to disconnect the ftp connection.", e);
            }
        }
    }
}

From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java

License:Apache License

private void copyZip(FTPClient ftp, String source, FileSystem fs, Path destination) throws IOException {
    InputStream is = ftp.retrieveFileStream(source);
    try (ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is))) {
        ZipEntry entry;//w w w  . java 2 s  .co m
        while ((entry = zis.getNextEntry()) != null) {
            LOG.debug("Extracting {}", entry);
            Path destinationPath = fs.makeQualified(new Path(destination, entry.getName()));
            try (OutputStream os = fs.create(destinationPath)) {
                LOG.debug("Downloading {} to {}", entry.getName(), destinationPath.toString());
                ByteStreams.copy(zis, os);
            }
        }
    }
}