Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

/**
 * Create a FileIndexDescriptor to describe what columns have been indexed
 * @param path// w  ww.  j  a  v  a  2 s  .  c  o  m
 *          the path to the directory where index files are stored for the
 *          input file
 * @return FileIndexDescriptor
 * @throws IOException
 */

protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException {
    Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath());

    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(inputFile.getPath().toString());
    fid.setDocType(getExpectedDocType());
    LOG.info("getting checksum from:" + inputFile.getPath());
    FileChecksum cksum = fs.getFileChecksum(inputFile.getPath());
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = null;
    if (cksum != null)
        fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(),
                ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());

    Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME);
    FSDataOutputStream os = fs.create(idxPath, true);
    @SuppressWarnings("unchecked")
    ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable
            .newInstance(fid.getClass());
    writable.set(fid);
    writable.write(os);
    os.close();
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java

License:Apache License

private void writeIndexDescriptors(ETwinIndexDescriptor ETwinIndexDescriptor) throws IOException {
    Configuration conf = getConf();

    FileSystem fs = (new Path(IndexConfig.index.get()).getFileSystem(conf));

    FileStatus[] fileStats = fs.globStatus(new Path(IndexConfig.index.get(), "*"));

    // We write one indexDescriptor per generated index segment.
    // Something to consider: right now it's a straight-up serialized Thrift object.
    // Would it be better to do the LzoBase64Line thing, so that we can apply our tools?
    // or extend the tools?
    for (int i = 0; i < fileStats.length; i++) {
        ETwinIndexDescriptor.setIndexPart(i);
        FileStatus stat = fileStats[i];/*from w w  w  . ja  v  a2  s.c  o  m*/
        Path idxPath = new Path(stat.getPath().getParent(), "_" + stat.getPath().getName() + ".indexmeta");
        FSDataOutputStream os = fs.create(idxPath, true);
        @SuppressWarnings("unchecked")
        ThriftWritable<ETwinIndexDescriptor> writable = (ThriftWritable<ETwinIndexDescriptor>) ThriftWritable
                .newInstance(ETwinIndexDescriptor.getClass());
        writable.set(ETwinIndexDescriptor);
        writable.write(os);
        os.close();
    }

}

From source file:com.twitter.pycascading.MetaScheme.java

License:Apache License

public void sink(FlowProcess flowProcess, SinkCall sinkCall) throws IOException {
    if (firstLine) {
        Path path = new Path(outputPath + "/" + headerFileName);
        FileSystem fs = path.getFileSystem(((HadoopFlowProcess) flowProcess).getJobConf());
        FSDataOutputStream fsdos = null;
        try {/*from w  w  w  . jav a2 s  .co  m*/
            if (fs.createNewFile(path)) {
                fsdos = fs.create(path, true);
                boolean firstField = true;
                for (Comparable<?> field : sinkCall.getOutgoingEntry().getFields()) {
                    if (firstField)
                        firstField = false;
                    else
                        fsdos.writeBytes("\t");
                    fsdos.writeBytes(field.toString());
                }
                fsdos.writeBytes("\t");
            }
        } catch (IOException ignored) {
        } finally {
            if (null != fsdos) {
                fsdos.close();
            }
        }
        // TODO: moar
        path = new Path(outputPath + "/" + schemeFileName);
        ObjectOutputStream oos = null;
        try {
            if (fs.createNewFile(path)) {
                fsdos = fs.create(path, true);
                oos = new ObjectOutputStream(fsdos);
                oos.writeObject(scheme);
                oos.writeObject(sinkCall.getOutgoingEntry().getFields());
            }
        } catch (IOException ignored) {
        } finally {
            if (null != fsdos) {
                fsdos.close();
            }
            if (null != oos) {
                oos.close();
            }
        }
    }
    firstLine = false;

    if (typeFileToWrite) {
        Path path = new Path(outputPath + "/" + typeFileName);
        FileSystem fs = path.getFileSystem(((HadoopFlowProcess) flowProcess).getJobConf());
        TupleEntry tupleEntry = null;
        FSDataOutputStream fsdos = null;
        try {
            if (fs.createNewFile(path)) {
                fsdos = fs.create(path, true);
                tupleEntry = sinkCall.getOutgoingEntry();
                for (int i = 0; i < tupleEntry.size(); i++) {
                    Comparable fieldName = null;
                    if (tupleEntry.getFields().size() < tupleEntry.size()) {
                        // We don't have names for the fields
                        fieldName = "";
                    } else {
                        fieldName = tupleEntry.getFields().get(i) + "\t";
                    }
                    Object object = tupleEntry.getObject(i);
                    Class<?> objectClass = (object == null ? Object.class : object.getClass());
                    fsdos.writeBytes(fieldName + objectClass.getName() + "\n");
                }
            }
        } catch (IOException e) {
        } finally {
            if (null != fsdos) {
                fsdos.close();
            }
        }
        typeFileToWrite = false;
    }
    scheme.sink(flowProcess, sinkCall);
}

From source file:com.uber.hoodie.common.HoodieTestDataGenerator.java

License:Apache License

public static void createCommitFile(String basePath, String commitTime, Configuration configuration)
        throws IOException {
    Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
            + HoodieTimeline.makeCommitFileName(commitTime));
    FileSystem fs = FSUtils.getFs(basePath, configuration);
    FSDataOutputStream os = fs.create(commitFile, true);
    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
    try {//from   w ww .j a v a2s .co  m
        // Write empty commit metadata
        os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } finally {
        os.close();
    }
}

From source file:com.uber.hoodie.common.HoodieTestDataGenerator.java

License:Apache License

public static void createCompactionRequestedFile(String basePath, String commitTime,
        Configuration configuration) throws IOException {
    Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
            + HoodieTimeline.makeRequestedCompactionFileName(commitTime));
    FileSystem fs = FSUtils.getFs(basePath, configuration);
    FSDataOutputStream os = fs.create(commitFile, true);
    os.close();//from  ww w  .  j av  a  2 s . c  o m
}

From source file:com.uber.hoodie.common.HoodieTestDataGenerator.java

License:Apache License

public static void createCompactionAuxiliaryMetadata(String basePath, HoodieInstant instant,
        Configuration configuration) throws IOException {
    Path commitFile = new Path(
            basePath + "/" + HoodieTableMetaClient.AUXILIARYFOLDER_NAME + "/" + instant.getFileName());
    FileSystem fs = FSUtils.getFs(basePath, configuration);
    FSDataOutputStream os = fs.create(commitFile, true);
    HoodieCompactionPlan workload = new HoodieCompactionPlan();
    try {//from  w  ww  .ja v a 2  s  .  c o m
        // Write empty commit metadata
        os.writeBytes(new String(AvroUtils.serializeCompactionPlan(workload).get(), StandardCharsets.UTF_8));
    } finally {
        os.close();
    }
}

From source file:com.uber.hoodie.common.HoodieTestDataGenerator.java

License:Apache License

public static void createSavepointFile(String basePath, String commitTime, Configuration configuration)
        throws IOException {
    Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
            + HoodieTimeline.makeSavePointFileName(commitTime));
    FileSystem fs = FSUtils.getFs(basePath, configuration);
    FSDataOutputStream os = fs.create(commitFile, true);
    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
    try {/*from   w ww  .j a va 2s  .c  o  m*/
        // Write empty commit metadata
        os.writeBytes(new String(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    } finally {
        os.close();
    }
}

From source file:com.uber.hoodie.common.model.HoodieTestUtils.java

License:Apache License

public static final void createInflightCleanFiles(String basePath, Configuration configuration,
        String... commitTimes) throws IOException {
    for (String commitTime : commitTimes) {
        Path commitFile = new Path((basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
                + HoodieTimeline.makeInflightCleanerFileName(commitTime)));
        FileSystem fs = FSUtils.getFs(basePath, configuration);
        FSDataOutputStream os = fs.create(commitFile, true);
    }//w ww.j  ava2  s  . co m
}

From source file:com.uber.hoodie.common.model.HoodieTestUtils.java

License:Apache License

public static void createCleanFiles(String basePath, String commitTime, Configuration configuration)
        throws IOException {
    Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
            + HoodieTimeline.makeCleanerFileName(commitTime));
    FileSystem fs = FSUtils.getFs(basePath, configuration);
    FSDataOutputStream os = fs.create(commitFile, true);
    try {//from w  w  w.  j  a v a2  s .c  om
        HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
                DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)], new ArrayList<>(),
                new ArrayList<>(), new ArrayList<>(), commitTime);
        // Create the clean metadata
        HoodieCleanMetadata cleanMetadata = AvroUtils.convertCleanMetadata(commitTime, Optional.of(0L),
                Arrays.asList(cleanStats));
        // Write empty clean metadata
        os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
    } finally {
        os.close();
    }
}

From source file:com.uber.hoodie.utilities.HoodieCompactionAdminTool.java

License:Apache License

private <T> void serializeOperationResult(FileSystem fs, T result) throws Exception {
    if ((cfg.outputPath != null) && (result != null)) {
        Path outputPath = new Path(cfg.outputPath);
        FSDataOutputStream fsout = fs.create(outputPath, true);
        ObjectOutputStream out = new ObjectOutputStream(fsout);
        out.writeObject(result);// w  w  w.ja v a  2 s.co m
        out.close();
        fsout.close();
    }
}