List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.jbw.taroutputformat.TarOutputFormat.java
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException { Configuration conf = tac.getConfiguration(); String extension = ".tar"; Path file = getDefaultWorkFile(tac, extension); FileSystem fs = file.getFileSystem(conf); OutputStream fileOut = fs.create(file, false); return new TarOutputWriter<>(fileOut); }
From source file:com.jeffy.hdfs.HDFSWriteFile.java
License:Apache License
/** * ??hdfs//from www.j a v a2 s .com * * @param args */ public static void main(String[] args) { if (args.length < 2) { System.err.println("Please input two parameter!"); System.out.println("Parameter: localfile hdfsfile"); System.exit(1); } String localPath = args[0]; String hdfsPath = args[1]; //?? Configuration config = new Configuration(); //?? try (InputStream in = new BufferedInputStream(new FileInputStream(localPath))) { FileSystem fs = FileSystem.get(URI.create(hdfsPath), config); try (FSDataOutputStream out = fs.create(new Path(hdfsPath), new Progressable() { @Override public void progress() { System.out.println("."); } })) { //??OutputStream,Hadooporg.apache.commons.io.IOUtils IOUtils.copy(in, out); System.out.println("File copy finished."); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. *///from w w w . ja v a 2 s .c om @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally *//* w ww. j ava 2 s . co m*/ @Test public void testCopyTablesToLocal() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(2, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally including the compression info table *//*from w w w. j av a 2 s.co m*/ @Test public void testCopyTablesToLocalWithCompressionInfo() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO); when(remoteFS.exists(new Path(str))).thenReturn(true); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(3, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.lightboxtechnologies.ingest.Uploader.java
License:Apache License
public int run(String[] args) throws IOException { if (args.length != 1) { System.err.println("Usage: Uploader <dest path>"); System.err.println("Writes data to HDFS path from stdin"); return 2; }/* ww w . j a v a 2 s .com*/ final String dst = args[0]; final Configuration conf = getConf(); final MessageDigest hasher = FsEntryUtils.getHashInstance("MD5"); final DigestInputStream hashedIn = new DigestInputStream(System.in, hasher); final FileSystem fs = FileSystem.get(conf); final Path path = new Path(dst); final FSDataOutputStream outFile = fs.create(path, true); IOUtils.copyLarge(hashedIn, outFile, new byte[1024 * 1024]); System.out.println(Hex.encodeHexString(hasher.digest())); return 0; }
From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java
License:Apache License
protected Map<String, Object> process_extent_large(FSDataInputStream file, FileSystem fs, Path outPath, Map<String, ?> map, Context context) throws IOException { context.getCounter(FileTypes.BIG).increment(1); final Map<String, Object> rec = new HashMap<String, Object>(); OutputStream fout = null;/* w w w . j a va2 s .c o m*/ try { fout = fs.create(outPath, true); hashAndExtract(rec, fout, file, map, context); } finally { IOUtils.closeQuietly(fout); } final String hash = new String(Hex.encodeHex((byte[]) rec.get("md5"))); final Path subDir = new Path("/texaspete/ev", hashFolder(hash)), hashPath = new Path(subDir, hash); fs.mkdirs(subDir); if (fs.exists(hashPath)) { context.getCounter(FileTypes.BIG_DUPES).increment(1); } else if (!fs.rename(outPath, hashPath)) { LOG.warn("Could not rename " + outPath + " to " + hashPath); context.getCounter(FileTypes.PROBLEMS).increment(1); } final StreamProxy content = new FileProxy(hashPath.toString()); rec.put("Content", content); return rec; }
From source file:com.lightboxtechnologies.spectrum.Uploader.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 1) { System.err.println("Usage: Uploader <dest path>"); System.err.println("Writes data to HDFS path from stdin"); System.exit(2);// ww w . j a v a 2s .c o m } MessageDigest hasher = FsEntryUtils.getHashInstance("MD5"); DigestInputStream hashedIn = new DigestInputStream(System.in, hasher); FileSystem fs = FileSystem.get(conf); Path path = new Path(otherArgs[0]); FSDataOutputStream outFile = fs.create(path, true); IOUtils.copyLarge(hashedIn, outFile, new byte[1024 * 1024]); System.out.println(new String(Hex.encodeHex(hasher.digest()))); }
From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java
License:Open Source License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String extension = RubixConstants.RUBIX_EXTENSION; CompressionCodec codec = null;/*from w w w .j av a 2s .c o m*/ boolean isCompressed = getCompressOutput(context); if (isCompressed) { Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension += codec.getDefaultExtension(); } Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(), context.getOutputValueClass(), codec); }
From source file:com.m6d.filecrush.crush.TextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", ""); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else {/* w w w.jav a 2 s . co m*/ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }