Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

private void retrieveSchema(FlowProcess<JobConf> flowProcess, Tap tap) {
    try {//from   ww w .j  a v a2s.c  o m
        if (tap instanceof CompositeTap)
            tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
        final String file = tap.getIdentifier();
        Path p = new Path(file);
        Configuration conf = new Configuration();
        final FileSystem fs = p.getFileSystem(conf);
        for (FileStatus status : fs.listStatus(p)) {
            p = status.getPath();
            // no need to open them all
            InputStream stream = new BufferedInputStream(fs.open(p));
            DataFileStream reader = new DataFileStream(stream, new ReflectDatumReader());
            dataSchema = reader.getSchema();
            retrieveSourceFields(tap);
            return;
        }
        throw new RuntimeException("no schema found in " + file);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.mcd.gdw.daas.mapreduce.ZipFileRecordReader.java

License:Apache License

/**
* Initialise and open the ZIP file from the FileSystem
*///from ww  w .j  a  v  a2 s .co  m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    this.taskAttemptContext = taskAttemptContext;
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);
    zipfilename = path.getName();
    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    skipFilesonSize = taskAttemptContext.getConfiguration().get("skipFilesonSize");

    if (taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE") != null)
        MAX_FILE_SIZE = Long.parseLong(taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE"));

}

From source file:com.mellanox.r4h.TestReadWhileWriting.java

License:Apache License

static void checkFile(Path p, int expectedsize, final Configuration conf)
        throws IOException, InterruptedException {
    //open the file with another user account
    final String username = UserGroupInformation.getCurrentUser().getShortUserName() + "_" + ++userCount;

    UserGroupInformation ugi = UserGroupInformation.createUserForTesting(username,
            new String[] { "supergroup" });

    final FileSystem fs = DFSTestUtil.getFileSystemAs(ugi, conf);

    final HdfsDataInputStream in = (HdfsDataInputStream) fs.open(p);

    //Check visible length
    Assert.assertTrue(in.getVisibleLength() >= expectedsize);

    //Able to read?
    for (int i = 0; i < expectedsize; i++) {
        Assert.assertEquals((byte) i, (byte) in.read());
    }/*from  w  ww . j  a  v a 2s  .com*/

    in.close();
}

From source file:com.metamx.druid.indexer.DbUpdaterJob.java

License:Open Source License

@Override
public boolean run() {
    final Configuration conf = new Configuration();

    ImmutableList.Builder<DataSegment> publishedSegmentsBuilder = ImmutableList.builder();

    for (String propName : System.getProperties().stringPropertyNames()) {
        if (propName.startsWith("hadoop.")) {
            conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
        }/*from w w w .j  a va2s .  c om*/
    }

    final Path descriptorInfoDir = config.makeDescriptorInfoDir();

    try {
        FileSystem fs = descriptorInfoDir.getFileSystem(conf);

        for (FileStatus status : fs.listStatus(descriptorInfoDir)) {
            final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class);

            dbi.withHandle(new HandleCallback<Void>() {
                @Override
                public Void withHandle(Handle handle) throws Exception {
                    handle.createStatement(String.format(
                            "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) "
                                    + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)",
                            spec.getSegmentTable())).bind("id", segment.getIdentifier())
                            .bind("dataSource", segment.getDataSource())
                            .bind("created_date", new DateTime().toString())
                            .bind("start", segment.getInterval().getStart().toString())
                            .bind("end", segment.getInterval().getEnd().toString())
                            .bind("partitioned", segment.getShardSpec().getPartitionNum())
                            .bind("version", segment.getVersion()).bind("used", true)
                            .bind("payload", jsonMapper.writeValueAsString(segment)).execute();

                    return null;
                }
            });

            publishedSegmentsBuilder.add(segment);
            log.info("Published %s", segment.getIdentifier());
        }
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }

    publishedSegments = publishedSegmentsBuilder.build();

    return true;
}

From source file:com.metamx.druid.indexer.HadoopDruidIndexerNode.java

License:Open Source License

@LifecycleStart
public void start() throws Exception {
    Preconditions.checkNotNull(argumentSpec, "argumentSpec");

    final HadoopDruidIndexerConfig config;
    if (argumentSpec.startsWith("{")) {
        config = HadoopDruidIndexerConfig.fromString(argumentSpec);
    } else if (argumentSpec.startsWith("s3://")) {
        final Path s3nPath = new Path(String.format("s3n://%s", argumentSpec.substring("s3://".length())));
        final FileSystem fs = s3nPath.getFileSystem(new Configuration());

        String configString = CharStreams.toString(new InputSupplier<InputStreamReader>() {
            @Override/*from   ww  w.j a va2  s .  co  m*/
            public InputStreamReader getInput() throws IOException {
                return new InputStreamReader(fs.open(s3nPath));
            }
        });

        config = HadoopDruidIndexerConfig.fromString(configString);
    } else {
        config = HadoopDruidIndexerConfig.fromFile(new File(argumentSpec));
    }

    if (intervalSpec != null) {
        final List<Interval> dataInterval = Lists.transform(Arrays.asList(intervalSpec.split(",")),
                new StringIntervalFunction());

        config.setIntervals(dataInterval);
    }

    new HadoopDruidIndexerJob(config).run();
}

From source file:com.metamx.druid.indexer.Utils.java

License:Open Source License

public static InputStream openInputStream(Path inputPath, final FileSystem fileSystem) throws IOException {
    return fileSystem.open(inputPath);
}

From source file:com.metamx.druid.indexer.Utils.java

License:Open Source License

public static Map<String, Object> getStats(JobContext job, Path statsPath) throws IOException {
    FileSystem fs = statsPath.getFileSystem(job.getConfiguration());

    return jsonMapper.readValue(fs.open(statsPath), new TypeReference<Map<String, Object>>() {
    });/* w ww  . ja v a 2s .c o m*/
}

From source file:com.metamx.milano.pig.MilanoLoadFunc.java

License:Apache License

/**
 * This builds a Pig ResourceSchema from the input file(s). This relies on the existence of TypeMetadata.
 * This is the method by which we pass the schema types and names directly to pig without having to specify them directly.
 *
 * @param location As passed to relativeToAbsolutePath
 * @param job      The job./*ww  w . java2s .  c  o  m*/
 *
 * @return Returns a ResourceSchema representing the incoming file(s) or null if TypeMetadata does not exist.
 *
 * @throws IOException Not thrown directly, but thrown from getMessageSchema where it indicates an unsupported type.
 */
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    Configuration conf = job.getConfiguration();
    Properties props = ConfigurationUtil.toProperties(conf);

    // HACK: Here we open the file directly to read the TypeMetadata.
    // HACK: There may be a better more direct way to do this, but it works for now.
    Path path = new Path(location);
    FileSystem fileSystem = path.getFileSystem(conf);

    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        log.debug(String.format("Path is a directory."));
        path = getFilePath(path, fileSystem);
        if (path == null) {
            return null;
        }
    } else if (!fileSystem.exists(path)) {
        return null;
    }

    MilanoProtoFile.Reader reader = MilanoProtoFile.createReader(fileSystem.open(path));
    typeMetadata = reader.getMetadata();
    reader.close();

    if (typeMetadata == null) {
        return null;
    }
    descriptor = MilanoTool.with(typeMetadata).getDescriptor();

    return new ResourceSchema(getMessageSchema(descriptor));
}

From source file:com.ml.ira.algos.LogisticModelParameters.java

License:Apache License

public static LogisticModelParameters loadFrom(Path path) throws IOException {
    FileSystem ofs = path.getFileSystem(new Configuration());
    if (!ofs.exists(path)) {
        throw new IOException(path.toString() + " does not exists. ");
    }/*from  w w w  .  j a  v a2  s.c  o m*/
    LogisticModelParameters result = new LogisticModelParameters();
    FSDataInputStream in = ofs.open(path);
    result.readFields(in);
    ofs.close();
    return result;
}

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java

@Override
public void setup(Context context) {
    number_of_clusters = context.getConfiguration().getInt("number_of_clusters", 2);
    feature_size = context.getConfiguration().getInt("feature_size", 1);
    num_of_members_in_a_cluster = new int[number_of_clusters];

    // initialization of ArrayLists 'cetroid_of_clusters', and 'sum_of_members_in_a_cluster' and array 'num_of_members_in_a_cluster'
    Float[] t = new Float[feature_size];
    for (int i = 0; i < feature_size; i++)
        t[i] = 0.0f;//w ww.j  av a 2s  .c om

    for (int i = 0; i < number_of_clusters; i++) {
        cetroid_of_clusters.add(t);
        sum_of_members_in_a_cluster.add(t);
        num_of_members_in_a_cluster[i] = 0;
    }

    // Read the current values of cetroids of clusters from k_means.txt file
    // If it is the first iteration, the cetroids of clusters must be initialized as 
    // random number (regard to the min & max values of each features) or by user.

    try {
        Float[] t_float;
        String uri = "/user/hduser/k_mean.txt";
        FileSystem fs = FileSystem.get(context.getConfiguration());
        if (fs.exists(new Path(uri))) {
            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri))));
            String[] temp;
            for (int i = 0; i < number_of_clusters; i++) {
                temp = br.readLine().split(",");
                t_float = new Float[feature_size];
                for (int j = 0; j < feature_size; j++)
                    t_float[j] = Float.parseFloat(temp[j]);
                cetroid_of_clusters.set(i, t_float);
            }
        } else {
            // initialization of clusters' centroids by user for our specific data. 
            // one good way is to randomly choose this values and put on "k_mean.txt" file as:
            //        cetroid of feature-1 seperated by ','
            //        cetroid of feature-2 seperated by ','
            // for example, regard to following values:
            //        13.325872,16.854961
            //        13.5158205,8.382423
            //        16.05023,4.76127
            t_float = new Float[2];
            t_float[0] = 13.325872f;
            t_float[1] = 16.854961f;
            cetroid_of_clusters.set(0, t_float);

            t_float = new Float[2];
            t_float[0] = 13.5158205f;
            t_float[1] = 8.382423f;
            cetroid_of_clusters.set(1, t_float);

            t_float = new Float[2];
            t_float[0] = 16.05023f;
            t_float[1] = 4.76127f;
            cetroid_of_clusters.set(2, t_float);

        }
    } catch (Exception e) {

    }
}