List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:com.github.dongjinleekr.hadoop.examples.DistributedCacheExample.java
License:Apache License
public static void printCachePath(Configuration conf) throws IOException, URISyntaxException { FileSystem fs = FileSystem.get(conf); URI[] archives = DistributedCache.getCacheArchives(conf); for (URI archive : archives) { HarFileSystem hfs = new HarFileSystem(); String cacheUri = String.format("har://hdfs-%s:%d%s", fs.getUri().getHost(), fs.getUri().getPort(), archive.toString());/*w ww . j a va 2s. co m*/ System.out.println(cacheUri); hfs.initialize(new URI(cacheUri), conf); FileStatus root = hfs.listStatus(new Path("."))[0]; FileStatus[] children = hfs.listStatus(root.getPath()); for (FileStatus child : children) { System.out.println(child.getPath()); } IOUtils.closeStream(hfs); } }
From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenMapper.java
License:Apache License
protected void setup(Context context) { // Get the conf location from the job conf. String config = context.getConfiguration().get("json.cfg"); System.out.println("Config File: " + config); // Read the Config from the path. FileSystem FS1 = null;/*from w w w .j a va2 s . c om*/ FSDataInputStream fsdis = null; try { FS1 = FileSystem.get(context.getConfiguration()); Path path = new Path(config); fsdis = FS1.open(path); ObjectMapper mapper = new ObjectMapper(); JsonNode root = mapper.readValue(fsdis, JsonNode.class); recordGenerator = new RecordGenerator(root); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } finally { IOUtils.closeStream(fsdis); } }
From source file:com.hortonworks.pso.data.generator.mapreduce.KafkaDataGenMapper.java
License:Apache License
protected void setup(Context context) { // Get the conf location from the job conf. String config = context.getConfiguration().get(CONFIG_FILE); System.out.println("Config File: " + config); // Read the Config from the path. FileSystem FS1 = null;/*from w w w. ja va2s .c om*/ FSDataInputStream fsdis = null; try { FS1 = FileSystem.get(context.getConfiguration()); Path path = new Path(config); fsdis = FS1.open(path); ObjectMapper mapper = new ObjectMapper(); JsonNode root = mapper.readValue(fsdis, JsonNode.class); recordGenerator = new RecordGenerator(root); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } finally { IOUtils.closeStream(fsdis); } }
From source file:com.ibm.stocator.fs.swift2d.systemtests.TestSwiftFileSystemExtendedContract.java
License:Apache License
@Test(timeout = SwiftTestConstants.SWIFT_TEST_TIMEOUT) public void testWriteReadFile() throws Exception { final Path f = new Path(getBaseURI() + "/test/test"); final FSDataOutputStream fsDataOutputStream = sFileSystem.create(f); final String message = "Test string"; fsDataOutputStream.write(message.getBytes()); fsDataOutputStream.close();/*from ww w. j a v a 2 s . co m*/ assertExists("created file", f); FSDataInputStream open = null; try { open = sFileSystem.open(f); final byte[] bytes = new byte[512]; final int read = open.read(bytes); final byte[] buffer = new byte[read]; System.arraycopy(bytes, 0, buffer, 0, read); assertEquals(message, new String(buffer)); } finally { sFileSystem.delete(f, false); IOUtils.closeStream(open); } }
From source file:com.impala2go.seq.HadoopClient.java
License:Apache License
/** * Read a Hadoop sequence file and stream values out (into @a out) * //from w w w . ja va2 s .com * @param seqPath * Full path to of the sequence file to read (excluding fs * protocol) * @param fs * Hadoop file system address * * @throws IOException * @throws IllegalAccessException related to reflection * @throws InstantiationException related to reflection */ @SuppressWarnings("rawtypes") public static void readSequenceFile(String seqPath, String fs, OutputStream out) throws IOException, InstantiationException, IllegalAccessException { conf.set(DEFAULT_FS, fs); Path path = new Path(seqPath); // create the reader: SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); WritableComparable key = null; Writable value = null; // load key and value classes: key = (WritableComparable) reader.getKeyClass().newInstance(); value = (Writable) reader.getValueClass().newInstance(); // read and stream out, values only: while (reader.next(key, value)) { //System.out.println(value); out.write(value.toString().getBytes()); out.flush(); } IOUtils.closeStream(reader); }
From source file:com.inmobi.conduit.CompressedFileReaderTest.java
License:Apache License
private void uncompress(String fileName) throws Exception { Configuration conf = new Configuration(); FileSystem fs;/*from w w w .jav a 2 s . co m*/ fs = FileSystem.getLocal(conf); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(new Path(fileName)); if (codec == null) { System.out.println("cant find codec"); System.exit(1); } LOG.info("Using compression codec [" + codec.toString() + "]"); CompressionInputStream is = codec.createInputStream(fs.open(new Path(fileName))); OutputStream out = null; try { String outputURI = CompressionCodecFactory.removeSuffix(fileName, codec.getDefaultExtension()); out = fs.create(new Path(outputURI + "-uncompressed")); org.apache.hadoop.io.IOUtils.copyBytes(is, out, conf); } finally { org.apache.hadoop.io.IOUtils.closeStream(out); IOUtils.closeStream(is); } }
From source file:com.inmobi.conduit.distcp.tools.CopyListing.java
License:Apache License
/** * Validate the final resulting path listing to see if there are any duplicate entries * * @param pathToListFile - path listing build by doBuildListing * @throws IOException - Any issues while checking for duplicates and throws * @throws DuplicateFileException - if there are duplicates *//*w w w . j a v a 2 s. c o m*/ protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException { Configuration config = getConf(); FileSystem fs = pathToListFile.getFileSystem(config); Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile); SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config); try { Text lastKey = new Text("*"); //source relative path can never hold * FileStatus lastFileStatus = new FileStatus(); Text currentKey = new Text(); while (reader.next(currentKey)) { if (currentKey.equals(lastKey)) { FileStatus currentFileStatus = new FileStatus(); reader.getCurrentValue(currentFileStatus); throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " + currentFileStatus.getPath() + " would cause duplicates. Aborting"); } reader.getCurrentValue(lastFileStatus); lastKey.set(currentKey); } } finally { IOUtils.closeStream(reader); } }
From source file:com.inmobi.conduit.distcp.tools.FileBasedCopyListing.java
License:Apache License
protected static List<Path> fetchFileList(Path sourceListing, Configuration conf) throws IOException { List<Path> result = new ArrayList<Path>(); FileSystem fs = sourceListing.getFileSystem(conf); BufferedReader input = null;/*from ww w. j a v a 2 s. c o m*/ try { input = new BufferedReader(new InputStreamReader(fs.open(sourceListing))); String line = input.readLine(); while (line != null) { result.add(new Path(line)); line = input.readLine(); } } finally { IOUtils.closeStream(input); } return result; }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void preserveFileAttributes(Configuration conf) throws IOException { String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); LOG.info("About to preserve attributes: " + attrSymbols); EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); long preservedEntries = 0; try {//from w w w . j a va2 s . c o m FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); while (sourceReader.next(srcRelPath, srcFileStatus)) { if (!srcFileStatus.isDir()) continue; Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); //Skip the root folder, preserve the status after atomic commit is complete //If it is changed any earlier, then atomic commit may fail if (targetRoot.equals(targetFile)) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes); HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); } LOG.info("Preserved status on " + preservedEntries + " dir entries on target"); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/*from www . j a va2 s .c o m*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }