Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java

License:Apache License

/**
 * Opens a temporary {@link ModelOutput} for the specified path.
 * @param <V> data type/*from  w  w w .  j a v  a  2s.  c o m*/
 * @param conf configuration
 * @param dataType data type
 * @param path target path
 * @param compressionCodec compression codec, or null if not compressed
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
@SuppressWarnings("unchecked")
public static <V> ModelOutput<V> openOutput(Configuration conf, Class<V> dataType, Path path,
        CompressionCodec compressionCodec) throws IOException {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    if (dataType == null) {
        throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$
    }
    if (path == null) {
        throw new IllegalArgumentException("path must not be null"); //$NON-NLS-1$
    }
    FileSystem fs = path.getFileSystem(conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Opening temporary output: {0} (fs={1})", //$NON-NLS-1$
                path, fs.getUri()));
    }
    if (Writable.class.isAssignableFrom(dataType)) {
        return (ModelOutput<V>) new TemporaryFileOutput<>(fs.create(path, true), dataType.getName(),
                OUTPUT_INIT_BUFFER_SIZE, OUTPUT_PAGE_SIZE);
    }
    SequenceFile.Writer out = newWriter(conf, fs, dataType, path, compressionCodec);
    return new SequenceFileModelOutput<>(out);
}

From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

License:Apache License

private void updateCache(Path file, long checksum, Path cachePath, Path cacheChecksumPath) throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info(MessageFormat.format("updating library cache: {0} -> {1}", file, cachePath));
    }/*w  ww.  ja  v  a2s.  co  m*/

    FileSystem sourceFs = file.getFileSystem(configuration);
    FileSystem cacheFs = cachePath.getFileSystem(configuration);

    // remove checksum file -> cachePath
    delete(cacheFs, cacheChecksumPath);
    delete(cacheFs, cachePath);

    // sync source file to cache file
    try (FSDataOutputStream checksumOutput = cacheFs.create(cacheChecksumPath, false)) {
        checksumOutput.writeLong(checksum);
        syncFile(sourceFs, file, cacheFs, cachePath);
    }
}

From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

License:Apache License

private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile)
        throws IOException {
    byte[] buf = byteBuffers.get();
    try (FSDataOutputStream output = targetFs.create(targetFile, false);
            FSDataInputStream input = sourceFs.open(sourceFile)) {
        while (true) {
            int read = input.read(buf);
            if (read < 0) {
                break;
            }/*from w  w w.  j ava  2 s.  c om*/
            output.write(buf, 0, read);
        }
    }
}

From source file:com.asiainfo.srd.HioBench.java

License:Apache License

static void writeFile(FileSystem fs) throws IOException {
    FSDataOutputStream fos = fs.create(options.filePath, (short) 1);
    byte arr[] = new byte[65536];
    try {//w w  w  .  j a v a2s  .c  om
        for (long off = 0; off < options.nBytesInFile;) {
            fillArrayWithExpected(arr, off, arr.length);
            long rem = options.nBytesInFile - off;
            int lim = rem > arr.length ? arr.length : (int) rem;
            fos.write(arr, 0, lim);
            off += lim;
        }
    } finally {
        fos.close();
    }
}

From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java

License:Apache License

public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException {

    //handle both an output directory and an output filename (ending with .avro)
    String outputFilename = DEFAULTOUTPUTFILENAME;
    if (outDirStr.endsWith(".avro")) {
        isOutputNameSpecifiedAndAFile = true;
        //String[] outputParts = outDirStr.split(":?\\\\");
        String[] outputParts = outDirStr.split("/");

        outputFilename = outputParts[outputParts.length - 1];

        //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /)
        outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), "");
        outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0));
    }//  w w  w.ja  v a 2s  .  co m

    //Get block size - not needed
    //long hdfsBlockSize = getBlockSize();
    //System.out.println("HDFS FS block size: "+hdfsBlockSize);

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
            System.exit(1);
        } else {
            hdfs.mkdirs(outDir);
        }
    }

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
            inputFileList.add((FileStatus) fileStatus);
        }
    }

    if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename.
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");
        System.exit(1);
    }

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    //compCodecName should be null, deflate, snappy, or bzip2
    if (compCodecName == null) {
        compCodecName = "deflate"; //set to deflate even though original is no compression
    }
    dfrFirstFile.close();

    //Create Empty HDFS file in output dir
    String seedFileStr = outDirStr + "/" + outputFilename;
    Path seedFile = new Path(seedFileStr);
    FSDataOutputStream hdfsdos = null;
    try {
        hdfsdos = hdfs.create(seedFile, false);
    } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) {
        if (handleExisting.equals("overwrite")) {
            hdfs.delete(seedFile, false);
            hdfsdos = hdfs.create(seedFile, false);
        } else if (handleExisting.equals("append")) {
            hdfsdos = hdfs.append(seedFile);
        } else {
            System.out
                    .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to "
                            + handleExisting + ". Exiting.");
            System.exit(1);
        }
    }
    if (hdfsdos == null) {
        System.out.println("Unable to create or write to output file ( " + seedFileStr
                + " ). handleExisting is set to " + handleExisting + ". Exiting.");
        System.exit(1);
    }

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file
    dfwBase.setCodec(CodecFactory.fromString(compCodecName));

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);
    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf);
        DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>());

        dfw.appendAllFrom(dfr, false);

        dfr.close();
    }

    dfw.close();
    dfwBase.close();

}

From source file:com.awcoleman.StandaloneJava.AvroCombinerByRecord.java

License:Apache License

public AvroCombinerByRecord(String inDirStr, String outDirStr) throws IOException {

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));

    FileSystem hdfs = FileSystem.get(conf);

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);//from  w w w .  j  av a  2  s .  c  om
    }

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
            System.exit(1);
        } else {
            hdfs.mkdirs(outDir);
        }
    }

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile()) {
            inputFileList.add((FileStatus) fileStatus);
        }
    }

    if (inputFileList.size() <= 1) {
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");
        System.exit(1);
    }

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    dfrFirstFile.close();

    //Create Empty HDFS file in output dir
    Path seedFile = new Path(outDirStr + "/combinedByRecord.avro");
    FSDataOutputStream hdfsdos = hdfs.create(seedFile, false);

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file
    dfwBase.setCodec(CodecFactory.fromString(compCodecName));

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);

    for (FileStatus thisFileStatus : inputFileList) {

        DataFileStream<Object> avroStream = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        avroStream = new DataFileStream<Object>(inStream, reader);

        long recordCounter = 0;
        while (avroStream.hasNext()) {
            dfw.append(avroStream.next());

            recordCounter++;
        }
        avroStream.close();
        inStream.close();

        System.out.println("Appended " + recordCounter + " records from " + thisFileStatus.getPath().getName()
                + " to " + seedFile.getName());
    }

    dfw.close();
    dfwBase.close();
}

From source file:com.benchmark.mapred.terasort.TeraOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    Path dir = getWorkOutputPath(job);
    FileSystem fs = dir.getFileSystem(job);
    FSDataOutputStream fileOut = fs.create(new Path(dir, name), progress);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.bizosys.hsearch.console.ui.SearchServlet.java

License:Apache License

public void actionCreate(HttpServletRequest req, String projectName, String webappsClassDirPath,
        String schemaFileLoc) throws FileNotFoundException, UnsupportedEncodingException, IOException {
    String schemaXmlContent = req.getParameter("schema");
    schemaXmlContent = (null == schemaXmlContent) ? StringUtils.Empty : schemaXmlContent.trim();
    if (schemaXmlContent.length() == 0)
        throw new RuntimeException("Schema content is missing.");

    File schemaFile = new File(schemaFileLoc);
    PrintWriter writer = null;// w w  w  . j  av a2 s  . c  om
    FileWriter fw = null;
    try {
        writer = new PrintWriter(schemaFile, "UTF-8");
        writer.write(schemaXmlContent);
        writer.flush();

        SetupServlet.create(projectName);

        File prjectFile = new File(webappsClassDirPath + "/projects.txt");
        fw = new FileWriter(prjectFile, true);
        fw.write(projectName);
        fw.write('\n');
    } finally {
        if (null != writer)
            writer.close();

        if (null != fw) {
            fw.flush();
            fw.close();
        }
    }

    if (schemaFile.exists()) {

        FileSystem fs = null;
        FSDataOutputStream hdfsFile = null;
        try {
            fs = FileSystem.get(conf);
            Path schemaHdfsFilePath = new Path(schemaFile.getName());

            hdfsFile = fs.create(schemaHdfsFilePath, fs.exists(schemaHdfsFilePath));
            hdfsFile.write(FileReaderUtil.getBytes(new File(schemaFile.getAbsolutePath())));
        } catch (Exception ex) {
            throw new IOException(
                    "Unable to create @ hadoop Please check permission on dfs " + schemaFile.getName(), ex);
        } finally {
            if (null != hdfsFile)
                hdfsFile.close();
            if (null != fs)
                fs.close();
        }

    }
}

From source file:com.blackberry.logdriver.mapred.BinaryRecordWriter.java

License:Apache License

public BinaryRecordWriter(JobConf job) {
    String extension = job.get("output.file.extension", "");

    String taskid = job.get("mapred.task.id");
    try {/*w  w  w . j  a  v  a 2s . c o  m*/
        Path outputPath = BinaryOutputFormat.getTaskOutputPath(job, taskid + extension);

        FileSystem fs = FileSystem.get(job);
        LOG.info("Creating output path: {}", outputPath);
        out = fs.create(outputPath, true);
    } catch (IOException e) {
        LOG.error("Error creating output file.", e);
    }
}

From source file:com.blackberry.logdriver.mapreduce.BinaryRecordWriter.java

License:Apache License

/**
 * Create a writer for the given BinaryOutputFormat and TaskAttemptContext.
 * /*from   ww w .  j ava2s  .  c  o m*/
 * @param outputFormat
 * @param context
 */
public BinaryRecordWriter(BinaryOutputFormat outputFormat, TaskAttemptContext context) {
    Configuration conf = context.getConfiguration();
    String extension = conf.get("output.file.extension", "");

    try {
        Path outputPath = outputFormat.getDefaultWorkFile(context, extension);
        FileSystem fs = FileSystem.get(conf);
        LOG.info("Creating output path: {}", outputPath);
        out = fs.create(outputPath, true);
    } catch (IOException e) {
        LOG.error("Error creating output file.", e);
    }
}