List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.cloudera.seismic.segy.SegyUnloader.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("input", true, "SU sequence files to export from Hadoop"); options.addOption("output", true, "The local SU file to write"); // Parse the commandline and check for required arguments. CommandLine cmdLine = new PosixParser().parse(options, args, false); if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) { System.out.println("Mising required input/output arguments"); new HelpFormatter().printHelp("SegyUnloader", options); System.exit(1);//from w w w . java2 s . c o m } Configuration conf = getConf(); FileSystem hdfs = FileSystem.get(conf); Path inputPath = new Path(cmdLine.getOptionValue("input")); if (!hdfs.exists(inputPath)) { System.out.println("Input path does not exist"); System.exit(1); } PathFilter pf = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output"))); for (FileStatus fs : hdfs.listStatus(inputPath, pf)) { write(fs.getPath(), os, conf); } os.close(); return 0; }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
/** Create the directory where we'll write our test files to; and * make sure it has no files in it.//from ww w .j av a 2 s. com */ private void ensureEmptyWriteDir() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path writeDir = getWritePath(); fs.mkdirs(writeDir); FileStatus[] stats = fs.listStatus(writeDir); for (FileStatus stat : stats) { if (stat.isDir()) { fail("setUp(): Write directory " + writeDir + " contains subdirectories"); } LOG.debug("setUp(): Removing " + stat.getPath()); if (!fs.delete(stat.getPath(), false)) { fail("setUp(): Could not delete residual file " + stat.getPath()); } } if (!fs.exists(writeDir)) { fail("setUp: Could not create " + writeDir); } }
From source file:com.cloudera.sqoop.TestAppendUtils.java
License:Apache License
/** @return FileStatus for data files only. */ private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException { FileStatus[] fileStatuses = fs.listStatus(path); ArrayList files = new ArrayList(); Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); for (FileStatus fstat : fileStatuses) { String fname = fstat.getPath().getName(); if (!fstat.isDir()) { Matcher mat = patt.matcher(fname); if (mat.matches()) { files.add(fstat);/* w ww .j a v a 2s .c om*/ } } } return (FileStatus[]) files.toArray(new FileStatus[files.size()]); }
From source file:com.cloudera.sqoop.TestMerge.java
License:Apache License
/** * Return true if there's a file in 'dirName' with a line that starts with * 'prefix'.//from w w w. java2 s .c om */ protected boolean recordStartsWith(String prefix, String dirName) throws Exception { Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR); Path targetPath = new Path(warehousePath, dirName); FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files = fs.listStatus(targetPath); if (null == files || files.length == 0) { fail("Got no import files!"); } for (FileStatus stat : files) { Path p = stat.getPath(); if (p.getName().startsWith("part-")) { if (checkFileForLine(fs, p, prefix)) { // We found the line. Nothing further to do. return true; } } } return false; }
From source file:com.cloudera.sqoop.TestMultiMaps.java
License:Apache License
/** @return a list of Path objects for each data file */ protected List<Path> getDataFilePaths() throws IOException { List<Path> paths = new ArrayList<Path>(); Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }//from ww w . j a v a 2 s.c o m FileSystem fs = FileSystem.get(conf); FileStatus[] stats = fs.listStatus(getTablePath(), new Utils.OutputFileUtils.OutputFilesFilter()); for (FileStatus stat : stats) { paths.add(stat.getPath()); } return paths; }
From source file:com.cloudera.sqoop.testutil.ImportJobTestCase.java
License:Apache License
/** * Do a MapReduce-based import of the table and verify that the results * were imported as expected. (tests readFields(ResultSet) and toString()) * @param expectedVal the value we injected into the table. * @param importCols the columns to import. If null, all columns are used. *//* w ww. j av a 2 s . c o m*/ protected void verifyImport(String expectedVal, String[] importCols) { // paths to where our output file will wind up. Path tableDirPath = getTablePath(); removeTableDir(); Configuration conf = getConf(); SqoopOptions opts = getSqoopOptions(conf); // run the tool through the normal entry-point. int ret; try { Sqoop importer = new Sqoop(new ImportTool(), conf, opts); ret = Sqoop.runSqoop(importer, getArgv(true, importCols, conf)); } catch (Exception e) { LOG.error("Got exception running Sqoop: " + e.toString()); throw new RuntimeException(e); } // expect a successful return. assertEquals("Failure during job", 0, ret); opts = getSqoopOptions(conf); try { ImportTool importTool = new ImportTool(); opts = importTool.parseArguments(getArgv(false, importCols, conf), conf, opts, true); } catch (Exception e) { fail(e.toString()); } CompilationManager compileMgr = new CompilationManager(opts); String jarFileName = compileMgr.getJarFilename(); ClassLoader prevClassLoader = null; try { prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName()); // Now open and check all part-files in the table path until we find // a non-empty one that we can verify contains the value. if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FileSystem fs = FileSystem.get(conf); FileStatus[] stats = fs.listStatus(tableDirPath); if (stats == null || stats.length == 0) { fail("Error: no files in " + tableDirPath); } boolean foundRecord = false; for (FileStatus stat : stats) { if (!stat.getPath().getName().startsWith("part-") && !stat.getPath().getName().startsWith("data-")) { // This isn't a data file. Ignore it. continue; } try { Object readValue = SeqFileReader.getFirstValue(stat.getPath().toString()); LOG.info("Read back from sequencefile: " + readValue); foundRecord = true; // Add trailing '\n' to expected value since SqoopRecord.toString() // encodes the record delim. if (null == expectedVal) { assertEquals("Error validating result from SeqFile", "null\n", readValue.toString()); } else { assertEquals("Error validating result from SeqFile", expectedVal + "\n", readValue.toString()); } } catch (EOFException eoe) { // EOF in a file isn't necessarily a problem. We may have some // empty sequence files, which will throw this. Just continue // in the loop. } } if (!foundRecord) { fail("Couldn't read any records from SequenceFiles"); } } catch (IOException ioe) { fail("IOException: " + ioe.toString()); } finally { if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Returns the greatest partition number available for appending, for data * files in targetDir.//from w ww. ja v a 2 s . c o m */ private int getNextPartition(FileSystem fs, Path targetDir) throws IOException { int nextPartition = 0; FileStatus[] existingFiles = fs.listStatus(targetDir); if (existingFiles != null && existingFiles.length > 0) { Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); for (FileStatus fileStat : existingFiles) { if (!fileStat.isDir()) { String filename = fileStat.getPath().getName(); Matcher mat = patt.matcher(filename); if (mat.matches()) { int thisPart = Integer.parseInt(mat.group(1)); if (thisPart >= nextPartition) { nextPartition = thisPart; nextPartition++; } } } } } if (nextPartition > 0) { LOG.info("Using found partition " + nextPartition); } return nextPartition; }
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Move files from source to target using a specified starting partition. *//*from w ww. jav a 2 s . c o m*/ private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException { NumberFormat numpart = NumberFormat.getInstance(); numpart.setMinimumIntegerDigits(PARTITION_DIGITS); numpart.setGroupingUsed(false); Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); FileStatus[] tempFiles = fs.listStatus(sourceDir); if (null == tempFiles) { // If we've already checked that the dir exists, and now it can't be // listed, this is a genuine error (permissions, fs integrity, or other). throw new IOException("Could not list files from " + sourceDir); } // Move and rename files & directories from temporary to target-dir thus // appending file's next partition for (FileStatus fileStat : tempFiles) { if (!fileStat.isDir()) { // Move imported data files String filename = fileStat.getPath().getName(); Matcher mat = patt.matcher(filename); if (mat.matches()) { String name = getFilename(filename); String fileToMove = name.concat(numpart.format(partitionStart++)); String extension = getFileExtension(filename); if (extension != null) { fileToMove = fileToMove.concat(extension); } LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove); fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove)); } } else { // Move directories (_logs & any other) String dirName = fileStat.getPath().getName(); Path path = new Path(targetDir, dirName); int dirNumber = 0; while (fs.exists(path)) { path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++))); } LOG.debug("Directory: " + dirName + " renamed to: " + path.getName()); fs.rename(fileStat.getPath(), path); } } }
From source file:com.clustertest2.clustertest2.vectorization.DocTokenizer.java
public void tokenizeDirectory(Path dir) throws InterruptedException { try {//from www . ja v a 2 s .com numThreads.set(0); for (FileStatus f : ClusterFileService.FS.listStatus(dir)) { System.out.println("adding work"); tokenizer.addWork(f.getPath()); numThreads.incrementAndGet(); } } catch (Exception e) { System.out.println(e.getClass()); } finally { while (numThreads.get() != 0) { Thread.sleep(1000); } System.out.println("Done tokenizing"); } }
From source file:com.collective.celos.ci.testing.fixtures.create.OutputFixDirFromHdfsCreator.java
License:Apache License
private FixFsObject read(Path path, CelosCiContext context) throws Exception { FileStatus fileStatus = context.getFileSystem().getFileStatus(path); if (fileStatus.isDirectory()) { Map<String, FixFsObject> content = Maps.newHashMap(); FileStatus[] statuses = context.getFileSystem().listStatus(fileStatus.getPath()); for (int i = 0; i < statuses.length; i++) { FileStatus childStatus = statuses[i]; FixFsObject fixObject = read(childStatus.getPath(), context); content.put(childStatus.getPath().getName(), fixObject); }//from w w w.jav a 2s . com return new FixDir(content); } else { return new FixFile(context.getFileSystem().open(fileStatus.getPath())); } }