Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.cloudera.seismic.segy.SegyUnloader.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("input", true, "SU sequence files to export from Hadoop");
    options.addOption("output", true, "The local SU file to write");

    // Parse the commandline and check for required arguments.
    CommandLine cmdLine = new PosixParser().parse(options, args, false);
    if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) {
        System.out.println("Mising required input/output arguments");
        new HelpFormatter().printHelp("SegyUnloader", options);
        System.exit(1);//from w  w w . java2 s  .  c o m
    }

    Configuration conf = getConf();
    FileSystem hdfs = FileSystem.get(conf);
    Path inputPath = new Path(cmdLine.getOptionValue("input"));
    if (!hdfs.exists(inputPath)) {
        System.out.println("Input path does not exist");
        System.exit(1);
    }

    PathFilter pf = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output")));
    for (FileStatus fs : hdfs.listStatus(inputPath, pf)) {
        write(fs.getPath(), os, conf);
    }
    os.close();

    return 0;
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

/** Create the directory where we'll write our test files to; and
 * make sure it has no files in it.//from ww w  .j av a  2 s. com
 */
private void ensureEmptyWriteDir() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path writeDir = getWritePath();

    fs.mkdirs(writeDir);

    FileStatus[] stats = fs.listStatus(writeDir);

    for (FileStatus stat : stats) {
        if (stat.isDir()) {
            fail("setUp(): Write directory " + writeDir + " contains subdirectories");
        }

        LOG.debug("setUp(): Removing " + stat.getPath());
        if (!fs.delete(stat.getPath(), false)) {
            fail("setUp(): Could not delete residual file " + stat.getPath());
        }
    }

    if (!fs.exists(writeDir)) {
        fail("setUp: Could not create " + writeDir);
    }
}

From source file:com.cloudera.sqoop.TestAppendUtils.java

License:Apache License

/** @return FileStatus for data files only. */
private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
    FileStatus[] fileStatuses = fs.listStatus(path);
    ArrayList files = new ArrayList();
    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
    for (FileStatus fstat : fileStatuses) {
        String fname = fstat.getPath().getName();
        if (!fstat.isDir()) {
            Matcher mat = patt.matcher(fname);
            if (mat.matches()) {
                files.add(fstat);/*  w ww  .j a v  a  2s  .c  om*/
            }
        }
    }
    return (FileStatus[]) files.toArray(new FileStatus[files.size()]);
}

From source file:com.cloudera.sqoop.TestMerge.java

License:Apache License

/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'.//from   w w w. java2  s  .c  om
 */
protected boolean recordStartsWith(String prefix, String dirName) throws Exception {
    Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
    Path targetPath = new Path(warehousePath, dirName);

    FileSystem fs = FileSystem.getLocal(new Configuration());
    FileStatus[] files = fs.listStatus(targetPath);

    if (null == files || files.length == 0) {
        fail("Got no import files!");
    }

    for (FileStatus stat : files) {
        Path p = stat.getPath();
        if (p.getName().startsWith("part-")) {
            if (checkFileForLine(fs, p, prefix)) {
                // We found the line. Nothing further to do.
                return true;
            }
        }
    }

    return false;
}

From source file:com.cloudera.sqoop.TestMultiMaps.java

License:Apache License

/** @return a list of Path objects for each data file */
protected List<Path> getDataFilePaths() throws IOException {
    List<Path> paths = new ArrayList<Path>();
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }//from   ww  w  . j  a  v a  2  s.c  o  m
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] stats = fs.listStatus(getTablePath(), new Utils.OutputFileUtils.OutputFilesFilter());

    for (FileStatus stat : stats) {
        paths.add(stat.getPath());
    }

    return paths;
}

From source file:com.cloudera.sqoop.testutil.ImportJobTestCase.java

License:Apache License

/**
 * Do a MapReduce-based import of the table and verify that the results
 * were imported as expected. (tests readFields(ResultSet) and toString())
 * @param expectedVal the value we injected into the table.
 * @param importCols the columns to import. If null, all columns are used.
 *//*  w ww. j  av a  2 s  .  c  o  m*/
protected void verifyImport(String expectedVal, String[] importCols) {

    // paths to where our output file will wind up.
    Path tableDirPath = getTablePath();

    removeTableDir();

    Configuration conf = getConf();
    SqoopOptions opts = getSqoopOptions(conf);

    // run the tool through the normal entry-point.
    int ret;
    try {
        Sqoop importer = new Sqoop(new ImportTool(), conf, opts);
        ret = Sqoop.runSqoop(importer, getArgv(true, importCols, conf));
    } catch (Exception e) {
        LOG.error("Got exception running Sqoop: " + e.toString());
        throw new RuntimeException(e);
    }

    // expect a successful return.
    assertEquals("Failure during job", 0, ret);

    opts = getSqoopOptions(conf);
    try {
        ImportTool importTool = new ImportTool();
        opts = importTool.parseArguments(getArgv(false, importCols, conf), conf, opts, true);
    } catch (Exception e) {
        fail(e.toString());
    }

    CompilationManager compileMgr = new CompilationManager(opts);
    String jarFileName = compileMgr.getJarFilename();
    ClassLoader prevClassLoader = null;
    try {
        prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName());

        // Now open and check all part-files in the table path until we find
        // a non-empty one that we can verify contains the value.
        if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
            conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stats = fs.listStatus(tableDirPath);

        if (stats == null || stats.length == 0) {
            fail("Error: no files in " + tableDirPath);
        }

        boolean foundRecord = false;
        for (FileStatus stat : stats) {
            if (!stat.getPath().getName().startsWith("part-")
                    && !stat.getPath().getName().startsWith("data-")) {
                // This isn't a data file. Ignore it.
                continue;
            }

            try {
                Object readValue = SeqFileReader.getFirstValue(stat.getPath().toString());
                LOG.info("Read back from sequencefile: " + readValue);
                foundRecord = true;
                // Add trailing '\n' to expected value since SqoopRecord.toString()
                // encodes the record delim.
                if (null == expectedVal) {
                    assertEquals("Error validating result from SeqFile", "null\n", readValue.toString());
                } else {
                    assertEquals("Error validating result from SeqFile", expectedVal + "\n",
                            readValue.toString());
                }
            } catch (EOFException eoe) {
                // EOF in a file isn't necessarily a problem. We may have some
                // empty sequence files, which will throw this. Just continue
                // in the loop.
            }
        }

        if (!foundRecord) {
            fail("Couldn't read any records from SequenceFiles");
        }
    } catch (IOException ioe) {
        fail("IOException: " + ioe.toString());
    } finally {
        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Returns the greatest partition number available for appending, for data
 * files in targetDir.//from w  ww. ja  v a  2 s . c  o  m
 */
private int getNextPartition(FileSystem fs, Path targetDir) throws IOException {

    int nextPartition = 0;
    FileStatus[] existingFiles = fs.listStatus(targetDir);
    if (existingFiles != null && existingFiles.length > 0) {
        Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
        for (FileStatus fileStat : existingFiles) {
            if (!fileStat.isDir()) {
                String filename = fileStat.getPath().getName();
                Matcher mat = patt.matcher(filename);
                if (mat.matches()) {
                    int thisPart = Integer.parseInt(mat.group(1));
                    if (thisPart >= nextPartition) {
                        nextPartition = thisPart;
                        nextPartition++;
                    }
                }
            }
        }
    }

    if (nextPartition > 0) {
        LOG.info("Using found partition " + nextPartition);
    }

    return nextPartition;
}

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Move files from source to target using a specified starting partition.
 *//*from   w  ww. jav a  2  s  . c o  m*/
private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException {

    NumberFormat numpart = NumberFormat.getInstance();
    numpart.setMinimumIntegerDigits(PARTITION_DIGITS);
    numpart.setGroupingUsed(false);
    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
    FileStatus[] tempFiles = fs.listStatus(sourceDir);

    if (null == tempFiles) {
        // If we've already checked that the dir exists, and now it can't be
        // listed, this is a genuine error (permissions, fs integrity, or other).
        throw new IOException("Could not list files from " + sourceDir);
    }

    // Move and rename files & directories from temporary to target-dir thus
    // appending file's next partition
    for (FileStatus fileStat : tempFiles) {
        if (!fileStat.isDir()) {
            // Move imported data files
            String filename = fileStat.getPath().getName();
            Matcher mat = patt.matcher(filename);
            if (mat.matches()) {
                String name = getFilename(filename);
                String fileToMove = name.concat(numpart.format(partitionStart++));
                String extension = getFileExtension(filename);
                if (extension != null) {
                    fileToMove = fileToMove.concat(extension);
                }
                LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove);
                fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove));
            }
        } else {
            // Move directories (_logs & any other)
            String dirName = fileStat.getPath().getName();
            Path path = new Path(targetDir, dirName);
            int dirNumber = 0;
            while (fs.exists(path)) {
                path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++)));
            }
            LOG.debug("Directory: " + dirName + " renamed to: " + path.getName());
            fs.rename(fileStat.getPath(), path);
        }
    }
}

From source file:com.clustertest2.clustertest2.vectorization.DocTokenizer.java

public void tokenizeDirectory(Path dir) throws InterruptedException {
    try {//from www  .  ja v  a 2  s  .com
        numThreads.set(0);
        for (FileStatus f : ClusterFileService.FS.listStatus(dir)) {
            System.out.println("adding work");
            tokenizer.addWork(f.getPath());
            numThreads.incrementAndGet();
        }
    } catch (Exception e) {
        System.out.println(e.getClass());
    } finally {
        while (numThreads.get() != 0) {
            Thread.sleep(1000);
        }
        System.out.println("Done tokenizing");
    }
}

From source file:com.collective.celos.ci.testing.fixtures.create.OutputFixDirFromHdfsCreator.java

License:Apache License

private FixFsObject read(Path path, CelosCiContext context) throws Exception {
    FileStatus fileStatus = context.getFileSystem().getFileStatus(path);
    if (fileStatus.isDirectory()) {
        Map<String, FixFsObject> content = Maps.newHashMap();
        FileStatus[] statuses = context.getFileSystem().listStatus(fileStatus.getPath());
        for (int i = 0; i < statuses.length; i++) {
            FileStatus childStatus = statuses[i];
            FixFsObject fixObject = read(childStatus.getPath(), context);
            content.put(childStatus.getPath().getName(), fixObject);
        }//from  w w  w.jav  a  2s . com
        return new FixDir(content);
    } else {
        return new FixFile(context.getFileSystem().open(fileStatus.getPath()));
    }
}