Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.jbw.taroutputformat.TarOutputFormat.java

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    String extension = ".tar";
    Path file = getDefaultWorkFile(tac, extension);
    FileSystem fs = file.getFileSystem(conf);
    OutputStream fileOut = fs.create(file, false);
    return new TarOutputWriter<>(fileOut);
}

From source file:com.jeffy.hdfs.HDFSWriteFile.java

License:Apache License

/**
 * ??hdfs//from   www.j a v a2 s .com
 * 
 * @param args
 */
public static void main(String[] args) {
    if (args.length < 2) {
        System.err.println("Please input two parameter!");
        System.out.println("Parameter: localfile hdfsfile");
        System.exit(1);
    }

    String localPath = args[0];
    String hdfsPath = args[1];
    //??
    Configuration config = new Configuration();
    //??
    try (InputStream in = new BufferedInputStream(new FileInputStream(localPath))) {
        FileSystem fs = FileSystem.get(URI.create(hdfsPath), config);
        try (FSDataOutputStream out = fs.create(new Path(hdfsPath), new Progressable() {
            @Override
            public void progress() {
                System.out.println(".");
            }
        })) {
            //??OutputStream,Hadooporg.apache.commons.io.IOUtils
            IOUtils.copy(in, out);
            System.out.println("File copy finished.");
        }
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Copies a remote path to the local filesystem, while updating hadoop that we're making
 * progress. Doesn't support directories.
 *///from   w w  w  .  ja v  a 2 s  .c om
@VisibleForTesting
void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException {
    // don't support transferring from remote directories
    FileStatus remoteStat = remoteFS.getFileStatus(remote);
    Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote));
    // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do
    if (localFS.exists(local)) {
        FileStatus localStat = localFS.getFileStatus(local);
        if (localStat.isDirectory()) {
            local = new Path(local, remote.getName());
        }
    }
    long remoteFileSize = remoteStat.getLen();
    // do actual copy
    InputStream in = null;
    OutputStream out = null;
    try {
        long startTime = System.currentTimeMillis();
        long lastLogTime = 0;
        long bytesCopied = 0;
        in = remoteFS.open(remote);
        out = localFS.create(local, true);
        int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY,
                CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
        byte[] buf = new byte[buffSize];
        int bytesRead = in.read(buf);
        while (bytesRead >= 0) {
            long now = System.currentTimeMillis();
            // log transfer rate once per min, starting 1 min after transfer began
            if (now - lastLogTime > 60000L && now - startTime > 60000L) {
                double elapsedSec = (now - startTime) / 1000D;
                double bytesPerSec = bytesCopied / elapsedSec;
                LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize,
                        bytesPerSec);
                lastLogTime = now;
            }
            this.ctx.progress();
            out.write(buf, 0, bytesRead);
            bytesCopied += bytesRead;
            bytesRead = in.read(buf);
        }
        // try to close these outside of finally so we receive exception on failure
        out.close();
        out = null;
        in.close();
        in = null;
    } finally {
        // make sure everything's closed
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

/**
 * Tests to see if tables can be correctly copied locally
 *//* w  ww. j ava 2  s . co m*/
@Test
public void testCopyTablesToLocal() throws Exception {
    TaskAttemptContext context = getTaskAttemptContext(true, true, true);
    ssTableColumnRecordReader.initialize(inputSplit, context);

    doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class),
            any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class));

    FileSystem remoteFS = mock(FileSystem.class);
    FileSystem localFS = mock(FileSystem.class);

    byte[] data = new byte[] { 0xA };
    FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data));
    FSDataOutputStream fsOut = mock(FSDataOutputStream.class);

    when(remoteFS.open(any(Path.class))).thenReturn(fsIn);
    when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut);

    Path dataTablePath = inputSplit.getPath();
    FileStatus fileStatus = mock(FileStatus.class);
    when(fileStatus.getLen()).thenReturn(10L);
    when(fileStatus.isDirectory()).thenReturn(false);
    when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus);

    ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    verify(remoteFS).getFileStatus(dataTablePath);
    ssTableColumnRecordReader.close();
    verify(fsOut).write(any(byte[].class), eq(0), eq(data.length));
    assertEquals(2, ssTableColumnRecordReader.getComponentSize());
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

/**
 * Tests to see if tables can be correctly copied locally including the compression info table
 *//*from   w w  w. j  av a 2 s.co  m*/
@Test
public void testCopyTablesToLocalWithCompressionInfo() throws Exception {
    TaskAttemptContext context = getTaskAttemptContext(true, true, true);
    ssTableColumnRecordReader.initialize(inputSplit, context);

    doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class),
            any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class));

    FileSystem remoteFS = mock(FileSystem.class);
    FileSystem localFS = mock(FileSystem.class);

    byte[] data = new byte[] { 0xA };
    FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data));
    FSDataOutputStream fsOut = mock(FSDataOutputStream.class);

    when(remoteFS.open(any(Path.class))).thenReturn(fsIn);
    when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut);

    Path dataTablePath = inputSplit.getPath();
    FileStatus fileStatus = mock(FileStatus.class);
    when(fileStatus.getLen()).thenReturn(10L);
    when(fileStatus.isDirectory()).thenReturn(false);
    when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus);

    String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO);
    when(remoteFS.exists(new Path(str))).thenReturn(true);

    ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    verify(remoteFS).getFileStatus(dataTablePath);
    ssTableColumnRecordReader.close();
    verify(fsOut).write(any(byte[].class), eq(0), eq(data.length));
    assertEquals(3, ssTableColumnRecordReader.getComponentSize());
}

From source file:com.lightboxtechnologies.ingest.Uploader.java

License:Apache License

public int run(String[] args) throws IOException {
    if (args.length != 1) {
        System.err.println("Usage: Uploader <dest path>");
        System.err.println("Writes data to HDFS path from stdin");
        return 2;
    }/* ww  w  .  j  a  v  a  2  s .com*/

    final String dst = args[0];

    final Configuration conf = getConf();

    final MessageDigest hasher = FsEntryUtils.getHashInstance("MD5");
    final DigestInputStream hashedIn = new DigestInputStream(System.in, hasher);

    final FileSystem fs = FileSystem.get(conf);
    final Path path = new Path(dst);
    final FSDataOutputStream outFile = fs.create(path, true);

    IOUtils.copyLarge(hashedIn, outFile, new byte[1024 * 1024]);
    System.out.println(Hex.encodeHexString(hasher.digest()));
    return 0;
}

From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java

License:Apache License

protected Map<String, Object> process_extent_large(FSDataInputStream file, FileSystem fs, Path outPath,
        Map<String, ?> map, Context context) throws IOException {
    context.getCounter(FileTypes.BIG).increment(1);

    final Map<String, Object> rec = new HashMap<String, Object>();

    OutputStream fout = null;/*  w w w .  j  a  va2 s  .c o  m*/
    try {
        fout = fs.create(outPath, true);
        hashAndExtract(rec, fout, file, map, context);
    } finally {
        IOUtils.closeQuietly(fout);
    }

    final String hash = new String(Hex.encodeHex((byte[]) rec.get("md5")));
    final Path subDir = new Path("/texaspete/ev", hashFolder(hash)), hashPath = new Path(subDir, hash);
    fs.mkdirs(subDir);

    if (fs.exists(hashPath)) {
        context.getCounter(FileTypes.BIG_DUPES).increment(1);
    } else if (!fs.rename(outPath, hashPath)) {
        LOG.warn("Could not rename " + outPath + " to " + hashPath);
        context.getCounter(FileTypes.PROBLEMS).increment(1);
    }
    final StreamProxy content = new FileProxy(hashPath.toString());
    rec.put("Content", content);
    return rec;
}

From source file:com.lightboxtechnologies.spectrum.Uploader.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 1) {
        System.err.println("Usage: Uploader <dest path>");
        System.err.println("Writes data to HDFS path from stdin");
        System.exit(2);//  ww  w . j a v  a 2s  .c  o m
    }

    MessageDigest hasher = FsEntryUtils.getHashInstance("MD5");
    DigestInputStream hashedIn = new DigestInputStream(System.in, hasher);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(otherArgs[0]);
    FSDataOutputStream outFile = fs.create(path, true);

    IOUtils.copyLarge(hashedIn, outFile, new byte[1024 * 1024]);

    System.out.println(new String(Hex.encodeHex(hasher.digest())));
}

From source file:com.linkedin.cubert.io.rubix.RubixOutputFormat.java

License:Open Source License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String extension = RubixConstants.RUBIX_EXTENSION;

    CompressionCodec codec = null;/*from  w w w  .j av  a  2s .c  o  m*/
    boolean isCompressed = getCompressOutput(context);

    if (isCompressed) {
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension += codec.getDefaultExtension();
    }

    Path file = getDefaultWorkFile(context, extension);
    FileSystem fs = file.getFileSystem(conf);

    FSDataOutputStream fileOut = fs.create(file, false);
    return new RubixRecordWriter<K, V>(conf, fileOut, context.getOutputKeyClass(),
            context.getOutputValueClass(), codec);
}

From source file:com.m6d.filecrush.crush.TextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {/* w w w.jav a 2  s .  co m*/
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}