List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. * The part files are created by CSV_WRITE MR job. * /*from w ww .ja v a 2 s .com*/ * This method is invoked from CP-write instruction. * * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path mergedFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(mergedFilePath)) { hdfs.delete(mergedFilePath, true); } OutputStream out = hdfs.create(mergedFilePath, true); // write out the header, if needed if (csvprop.hasHeader()) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(csvprop.getDelim()); } sb.append('\n'); out.write(sb.toString().getBytes()); sb.setLength(0); } // if the source is a directory if (hdfs.isDirectory(srcFilePath)) { try { FileStatus[] contents = hdfs.listStatus(srcFilePath); Path[] partPaths = new Path[contents.length]; int numPartFiles = 0; for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { partPaths[i] = contents[i].getPath(); numPartFiles++; } } Arrays.sort(partPaths); for (int i = 0; i < numPartFiles; i++) { InputStream in = hdfs.open(partPaths[i]); try { IOUtils.copyBytes(in, out, conf, false); if (i < numPartFiles - 1) out.write('\n'); } finally { IOUtilFunctions.closeSilently(in); } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(srcFilePath)) { InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * // w w w.j a va2 s . c om * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ @SuppressWarnings("unchecked") public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /* * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already //boolean ret1 = hdfs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. //boolean ret2 = hdfs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ //boolean ret3 = hdfs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv //boolean ret4 = hdfs.rename(srcFilePath, destFilePath); //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (hdfs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<Path>(); for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = hdfs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = hdfs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart hdfs.delete(firstpart, true); hdfs.rename(tmp, firstpart); // rename srcfile to destFile hdfs.delete(destFilePath, true); hdfs.createNewFile(destFilePath); // force the creation of directory structure hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure hdfs.rename(srcFilePath, destFilePath); // move the data } else if (hdfs.isFile(srcFilePath)) { // create destination file OutputStream out = hdfs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java
License:Apache License
public void setSequential(JobConf conf) throws Exception { registerSerializers(conf);//from w w w.j a va2s .c o m // For an expression, the location is the final file name Path outPath = new Path(location); FileSystem fs = outPath.getFileSystem(conf); outPath = outPath.makeQualified(fs); if (fs.exists(outPath)) { // TODO: Jaql currently has overwrite semantics; add flag to control this if (fs.isFile(outPath)) { fs.delete(outPath, false); } else { // Look for a map-reduce output directory FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() { boolean onlyOne = true; public boolean accept(Path path) { String name = path.getName(); if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) { return false; } if (onlyOne) { onlyOne = false; return true; } return false; } }); if (nonMR.length > 0) { throw new IOException( "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath()); } fs.delete(outPath, true); } } // In sequential mode, we will write directly to the output file // and bypass the _temporary directory and rename of the standard // FileOutputCommitter by using our own DirectFileOutputCommitter. FileOutputFormat.setOutputPath(conf, outPath.getParent()); conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class); }
From source file:com.ibm.jaql.lang.expr.system.RUtil.java
License:Apache License
/** * Function that puts a local file into HDFS. * @param localPath/* w w w .jav a 2s .c o m*/ * @param hdfsPath * @return */ public static boolean saveToHDFS(String localPath, String hdfsPath) { try { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int bufferSize = 4 * 1024; byte[] buffer = new byte[bufferSize]; InputStream input = new BufferedInputStream(new FileInputStream(localPath), bufferSize); Path outputPath = new Path(hdfsPath); if (fs.exists(outputPath)) { if (!fs.isFile(outputPath)) { throw new IOException("Output path is a directory that already exists."); } LOG.info("Output path" + outputPath + " already exists. Overwriting it."); } FSDataOutputStream output = fs.create(outputPath, true); int numBytesRead; while ((numBytesRead = input.read(buffer)) > 0) { output.write(buffer, 0, numBytesRead); } input.close(); output.close(); return true; } catch (IOException e) { LOG.info("Error in writing file to HDFS.", e); return false; } }
From source file:com.idvp.platform.hdfs.HDFSDataStream.java
License:Apache License
protected void doOpen(Configuration conf, Path dstPath, FileSystem hdfs) throws IOException { boolean appending = false; if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { outStream = hdfs.append(dstPath); appending = true;/*from ww w. j a va2 s . c o m*/ } else { outStream = hdfs.create(dstPath); } serializer = new BodyTextEventSerializer.Builder().build(outStream); if (appending && !serializer.supportsReopen()) { outStream.close(); serializer = null; throw new IOException("serializer (" + "TEXT" + ") does not support append"); } // must call superclass to check for replication issues registerCurrentStream(outStream, hdfs, dstPath); if (appending) { serializer.afterReopen(); } else { serializer.afterCreate(); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
@Test public void testRun() { try {/*w ww .j a v a 2 s .co m*/ deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); copyMapper.setup(context); for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fs.getFileStatus(path), context); } // Check that the maps worked. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); Assert.assertTrue(fs.exists(targetPath)); Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path)); Assert.assertEquals(fs.getFileStatus(path).getReplication(), fs.getFileStatus(targetPath).getReplication()); Assert.assertEquals(fs.getFileStatus(path).getBlockSize(), fs.getFileStatus(targetPath).getBlockSize()); Assert.assertTrue( !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path))); } Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue()); // Here file is compressed file. So, we should compare the file length // with the number of bytes read long totalSize = 0; for (Path path : pathList) { totalSize += fs.getFileStatus(path).getLen(); } Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue()); long totalCounterValue = 0; for (Text value : writer.values()) { String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); totalCounterValue += numOfMsgs; } Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue); testCopyingExistingFiles(fs, copyMapper, context); } catch (Exception e) { LOG.error("Unexpected exception: ", e); Assert.assertTrue(false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private static void changeUserGroup(String user, String group) throws IOException { FileSystem fs = cluster.getFileSystem(); FsPermission changedPermission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); for (Path path : pathList) if (fs.isFile(path)) { fs.setOwner(path, user, group); fs.setPermission(path, changedPermission); }/*from w ww. j av a 2 s . c o m*/ }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
@Override protected void validatePaths(DistCpOptions options) throws IOException, InvalidInputException { if (options.isSkipPathValidation()) { LOG.debug("Skipping Path Validation in disctp"); return;//from w w w . ja v a 2 s . com } Path targetPath = options.getTargetPath(); FileSystem targetFS = targetPath.getFileSystem(getConf()); boolean targetIsFile = targetFS.isFile(targetPath); //If target is a file, then source has to be single file if (targetIsFile) { if (options.getSourcePaths().size() > 1) { throw new InvalidInputException("Multiple source being copied to a file: " + targetPath); } Path srcPath = options.getSourcePaths().get(0); FileSystem sourceFS = srcPath.getFileSystem(getConf()); if (!sourceFS.isFile(srcPath)) { throw new InvalidInputException( "Cannot copy " + srcPath + ", which is not a file to " + targetPath); } } for (Path path : options.getSourcePaths()) { FileSystem fs = path.getFileSystem(getConf()); if (!fs.exists(path)) { throw new InvalidInputException(path + " doesn't exist"); } } /* This is requires to allow map tasks to access each of the source clusters. This would retrieve the delegation token for each unique file system and add them to job's private credential store */ Credentials credentials = getCredentials(); if (credentials != null) { Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]); TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf()); } }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
private Path computeSourceRootPath(FileStatus sourceStatus, DistCpOptions options) throws IOException { Path target = options.getTargetPath(); FileSystem targetFS = target.getFileSystem(getConf()); boolean solitaryFile = options.getSourcePaths().size() == 1 && !sourceStatus.isDir(); if (solitaryFile) { if (targetFS.isFile(target) || !targetFS.exists(target)) { return sourceStatus.getPath(); } else {/* w ww . j ava 2 s . c om*/ return sourceStatus.getPath().getParent(); } } else { boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target)) || options.shouldSyncFolder() || options.shouldOverwrite(); return specialHandling && sourceStatus.isDir() ? sourceStatus.getPath() : sourceStatus.getPath().getParent(); } }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
private static void verifyResults() throws Exception { for (Path path : pathList) { FileSystem fs = cluster.getFileSystem(); Path sourcePath = path.makeQualified(fs); Path targetPath = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); Assert.assertTrue(fs.exists(targetPath)); Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath)); }/*from www .j a v a 2 s .c o m*/ }