Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

private static List<Path> fetchFileList(Configuration conf, Path srcList) throws IOException {
    List<Path> result = new ArrayList<Path>();
    FileSystem fs = srcList.getFileSystem(conf);
    BufferedReader input = null;/*from   www. ja va  2s  .  c o m*/
    try {
        input = new BufferedReader(new InputStreamReader(fs.open(srcList)));
        String line = input.readLine();
        while (line != null) {
            result.add(new Path(line));
            line = input.readLine();
        }
    } finally {
        checkAndClose(input);
    }
    return result;
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Sanity check for srcPath//from w  ww .  j  a  v  a 2 s.  c  om
 */
private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException {
    List<IOException> rslt = new ArrayList<IOException>();

    Path[] ps = new Path[srcPaths.size()];
    ps = srcPaths.toArray(ps);
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);

    for (Path p : srcPaths) {
        FileSystem fs = p.getFileSystem(jobConf);
        if (!fs.exists(p)) {
            rslt.add(new IOException("Input source " + p + " does not exist."));
        }
    }
    if (!rslt.isEmpty()) {
        throw new InvalidInputException(rslt);
    }
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Fully delete dir//w ww  .j  a va2s. com
 */
static void fullyDelete(String dir, Configuration conf) throws IOException {
    if (dir != null) {
        Path tmp = new Path(dir);
        tmp.getFileSystem(conf).delete(tmp, true);
    }
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Initialize ExecFilesMapper specific job-configuration.
 *
 * @param conf    : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args    Arguments/*from   w ww  . j av a  2s  . c o m*/
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
    jobConf.set(EXEC_CMD_LABEL, args.execCmd);

    //set boolean values
    jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname,
            args.flags.contains(Options.REDIRECT_ERROR_TO_OUT));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_" + NAME + "_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists);
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_" + NAME + "_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToExecCount=" + fileCount);
    LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(fileCount, jobConf);
    return fileCount > 0;
}

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Expands all directories passed as input and keeps only valid data tables.
 *
 * @return A list of all the data tables found under the input directories.
 *///from  www  .  j  a  v a2s .co  m
@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    List<FileStatus> files = super.listStatus(job);
    DataTablePathFilter dataTableFilter = getDataTableFilter(conf);
    files = cleanUpBackupDir(files);
    for (int i = 0; i < files.size(); i++) {
        FileStatus file = files.get(i);
        Path p = file.getPath();
        // Expand if directory
        if (file.isDirectory() && p != null) {
            LOG.info("Expanding {}", p);
            FileSystem fs = p.getFileSystem(conf);
            FileStatus[] children = fs.listStatus(p);
            List<FileStatus> listChildren = Lists.newArrayList(children);
            listChildren = cleanUpBackupDir(listChildren);
            files.addAll(i + 1, listChildren);
        }
        if (!dataTableFilter.accept(file.getPath())) {
            LOG.info("Removing {}", file.getPath());
            files.remove(i);
            i--;
        }
    }
    return files;
}

From source file:com.knewton.mrtool.io.JsonRecordReader.java

License:Apache License

/**
 * Get the line reader to be used for the file. A <code>LineReader</code> can read a file line
 * by line. This separate method helps with testing too.
 * //from  w  w w .  j a va  2  s  .c om
 * @param fileSplit
 * @param conf
 * @return
 * @throws IOException
 */
protected LineReader initLineReader(FileSplit fileSplit, Configuration conf) throws IOException {
    final Path file = fileSplit.getPath();
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(fileSplit.getPath());
    seekableIn = fileIn;
    boolean skipFirstLine = false;
    LineReader lineReader;
    if (codec != null) {
        lineReader = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        // if the start is not the beginning of the file then skip the first line to get the
        // next complete json record. The previous json record will be read by the record reader
        // that got assigned the previous InputSplit.
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        lineReader = new LineReader(fileIn, conf);
    }
    if (skipFirstLine) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    return lineReader;
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) {
    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {//from w  w w .  j a va 2 s.  co  m
        reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            byte[] tmp = ((Text) key).copyBytes();
            if (rowkeyList.contains(tmp) == false) {
                rowkeyList.add(tmp);
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeStream(reader);
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);

    return retValue;
}

From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) throws Exception {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path) == false) {
        System.err.println("Path " + path + " not found, no region split, HTable will be one region");
        return null;
    }/*from w  w  w  .  ja  v  a  2s  .  c  o  m*/

    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            rowkeyList.add(((Text) key).copyBytes());
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        IOUtils.closeStream(reader);
    }

    System.out.println((rowkeyList.size() + 1) + " regions");
    System.out.println(rowkeyList.size() + " splits");
    for (byte[] split : rowkeyList) {
        System.out.println(StringUtils.byteToHexString(split));
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);
    return retValue.length == 0 ? null : retValue;
}

From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java

License:Apache License

public static int runPipeline(String src, String dst) throws IOException {
    final Configuration conf = new Configuration();
    final FileSystem fs = FileSystem.get(conf);

    final Path rpath = new Path(src);
    final Path zpath = new Path(dst);

    if (!fs.exists(rpath)) {
        throw new IOException("Source path does not exist.");
    }/*from  ww  w  . j  a va  2s  .c  o  m*/

    OutputStream out = null;
    try {
        out = zpath.getFileSystem(conf).create(zpath);
        zip(fs, rpath, out);
        out.close();
    } finally {
        IOUtils.closeQuietly(out);
    }

    return 0;
}

From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    String imageID;// ww  w.  j  av a 2s  . c o  m
    String outpath;
    String friendlyname;
    final Set<String> exts = new HashSet<String>();

    if ("-f".equals(otherArgs[0])) {
        if (otherArgs.length != 4) {
            die();
        }

        // load extensions from file
        final Path extpath = new Path(otherArgs[1]);

        InputStream in = null;
        try {
            in = extpath.getFileSystem(conf).open(extpath);

            Reader r = null;
            try {
                r = new InputStreamReader(in);

                BufferedReader br = null;
                try {
                    br = new BufferedReader(r);

                    String line;
                    while ((line = br.readLine()) != null) {
                        exts.add(line.trim().toLowerCase());
                    }

                    br.close();
                } finally {
                    IOUtils.closeQuietly(br);
                }

                r.close();
            } finally {
                IOUtils.closeQuietly(r);
            }

            in.close();
        } finally {
            IOUtils.closeQuietly(in);
        }

        imageID = otherArgs[2];
        friendlyname = otherArgs[3];
        outpath = otherArgs[4];
    } else {
        if (otherArgs.length < 3) {
            die();
        }

        // read extensions from trailing args
        imageID = otherArgs[0];
        friendlyname = otherArgs[1];
        outpath = otherArgs[2];

        // lowercase all file extensions
        for (int i = 2; i < otherArgs.length; ++i) {
            exts.add(otherArgs[i].toLowerCase());
        }
    }

    conf.setStrings("extensions", exts.toArray(new String[exts.size()]));

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf);
    job.setJarByClass(SequenceFileExport.class);
    job.setMapperClass(SequenceFileExportMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(MapWritable.class);

    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    FsEntryHBaseInputFormat.setupJob(job, imageID);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    FileOutputFormat.setOutputPath(job, new Path(outpath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}