Example usage for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.linkedin.cubert.utils.CodeDictionary.java

License:Open Source License

/**
 * Reads dictionary from the HDFS filesystem.
 * /* w w w .  j a v a  2 s  . co  m*/
 * @param fs
 * @param path
 * @throws IOException
 */
public void read(FileSystem fs, Path path) throws IOException {
    FSDataInputStream istream = fs.open(path);
    BufferedReader reader = new BufferedReader(new InputStreamReader(istream));
    read(reader);
    reader.close();
}

From source file:com.linkedin.cubert.utils.CubertMD.java

License:Open Source License

public static HashMap<String, String> readMetafile(String metaFilePath) throws IOException {
    Job tempjob = new Job();
    Configuration tempconf = tempjob.getConfiguration();
    FileSystem fs = FileSystem.get(tempconf);

    HashMap<String, String> result = new HashMap<String, String>();
    FSDataInputStream inStream;//w  w  w.j a v a2s. c  o m
    try {
        inStream = fs.open(new Path(metaFilePath + "/.meta"));

        BufferedReader breader = new BufferedReader(new InputStreamReader(inStream));
        String line;
        while ((line = breader.readLine()) != null) {
            String[] splits = line.split("\\s+");
            result.put(splits[0], splits[1]);
        }
    } catch (IOException e) {
        return result;
    }
    return result;
}

From source file:com.linkedin.hadoop.example.WordCountJobTest.java

License:Apache License

/**
 * Helper method to read the word counts output file from the given path on HDFS.
 *
 * @param outputPath The path on HDFS to the word counts output file
 * @return The word counts as an ordered map of word to count
 * @throws Exception If there is a problem while reading the word counts
 *///from   w ww  .  jav a 2  s  .  c  o  m
private Map<String, Long> readWordCounts(Path outputPath) throws Exception {
    FileSystem fileSystem = getFileSystem();
    BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(outputPath)));
    Map<String, Long> wordCounts = new LinkedHashMap<>();

    try {
        String line = reader.readLine();
        while (line != null) {
            int tab = line.indexOf('\t');
            if (tab == -1) {
                throw new Exception(String.format("No tab delimiter found in the line: %s", line));
            }

            String word = line.substring(0, tab);
            long count = Long.parseLong(line.substring(tab + 1));
            if (wordCounts.containsKey(word)) {
                throw new Exception(String.format("Found repeated instance of the word: %s", word));
            }

            wordCounts.put(word, count);
            line = reader.readLine();
        }
    } finally {
        reader.close();
    }

    return wordCounts;
}

From source file:com.linkedin.mapred.AvroFileReader.java

License:Open Source License

protected DataFileStream<Object> getAvroDataStream(Path path) throws IOException {
    FileSystem fs = getFilesystem(_conf, path);

    GenericDatumReader<Object> avroReader = _isSpecific ? new SpecificDatumReader<Object>()
            : new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(path);
    return new DataFileStream<Object>(hdfsInputStream, avroReader);
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Loads the schema from an Avro data file.
 * //from www  .ja  v  a2  s .  com
 * @param conf The JobConf.
 * @param path The path to the data file.
 * @return The schema read from the data file's metadata.
 * @throws IOException
 */
public static Schema getSchemaFromFile(JobConf conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(new Configuration());
    FSDataInputStream dataInputStream = fs.open(path);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
    return dataFileStream.getSchema();
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Obtain a DataFileStream given a conf and path
 * @param conf//from  w w  w  .  j  ava2 s  . co m
 * @param path
 * @return
 * @throws IOException
 */
public static DataFileStream<Object> getAvroDataStream(JobConf conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(path);
    return new DataFileStream<Object>(hdfsInputStream, avroReader);
}

From source file:com.linkedin.pinot.core.indexsegment.utils.InputOutputStreamUtils.java

License:Apache License

public static InputStream getInputStream(String filePath, FileSystemMode mode, FileSystem fs)
        throws IOException {
    InputStream is = null;//from   w w  w .  jav  a  2s .  co  m
    switch (mode) {
    case DISK:
        is = new BufferedInputStream(new FileInputStream(new File(filePath)));
        break;
    case HDFS:

        is = new BufferedInputStream(fs.open(new Path(filePath)));
        break;
    default:
        throw new UnsupportedOperationException();
    }
    return is;
}

From source file:com.linkedin.pinot.hadoop.job.SegmentTarPushJob.java

License:Apache License

public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
    String fileName = path.getName();
    if (!fileName.endsWith(".tar.gz")) {
        return;/*from  ww  w .j  a v  a2  s  . c o  m*/
    }
    long length = fs.getFileStatus(path).getLen();
    for (String host : _hosts) {
        InputStream inputStream = null;
        try {
            inputStream = fs.open(path);
            fileName = fileName.split(".tar")[0];
            LOGGER.info("******** Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port);
            try {
                int responseCode = FileUploadUtils.sendSegmentFile(host, _port, fileName, inputStream, length);
                LOGGER.info("Response code: {}", responseCode);
            } catch (Exception e) {
                LOGGER.error("******** Error Upoading file: {} to Host: {} and Port: {}  *******", fileName,
                        host, _port);
                LOGGER.error("Caught exception during upload", e);
                throw new RuntimeException("Got Error during send tar files to push hosts!");
            }
        } finally {
            inputStream.close();
        }
    }
}

From source file:com.linkedin.thirdeye.bootstrap.segment.create.SegmentCreationPhaseJob.java

License:Apache License

private Schema createSchema(String configPath) throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());

    StarTreeConfig starTreeConfig = StarTreeConfig.decode(fs.open(new Path(configPath)));
    LOGGER.info("{}", starTreeConfig);

    Schema schema = new Schema();
    for (DimensionSpec dimensionSpec : starTreeConfig.getDimensions()) {
        FieldSpec spec = new DimensionFieldSpec();
        spec.setName(dimensionSpec.getName());
        spec.setDataType(DataType.STRING);
        spec.setSingleValueField(true);//w w w.  ja  v a 2s. c om
        schema.addSchema(dimensionSpec.getName(), spec);
    }
    for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
        FieldSpec spec = new MetricFieldSpec();
        spec.setName(metricSpec.getName());
        spec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        spec.setSingleValueField(true);
        schema.addSchema(metricSpec.getName(), spec);
    }
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG,
            starTreeConfig.getTime().getBucket().getUnit(), starTreeConfig.getTime().getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG,
            starTreeConfig.getTime().getBucket().getUnit(), starTreeConfig.getTime().getColumnName());
    schema.addSchema(starTreeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing));

    schema.setSchemaName(starTreeConfig.getCollection());

    return schema;
}

From source file:com.linkedin.thirdeye.bootstrap.segment.create.ThirdeyeRecordReader.java

License:Apache License

public ThirdeyeRecordReader(String sequenceFileName, Schema schema, String starTreeConfigFileName)
        throws IOException {
    super();/*from w w  w .  j a  v a  2 s.  c  o m*/
    super.initNullCounters(schema);
    _sequenceFile = new File(sequenceFileName);
    _schema = schema;

    FileSystem fs = FileSystem.get(new Configuration());
    _starTreeConfig = StarTreeConfig.decode(fs.open(new Path(starTreeConfigFileName)));

    _dimensionToIndexMapping = new HashMap<>();
    for (int i = 0; i < _starTreeConfig.getDimensions().size(); i++) {
        _dimensionToIndexMapping.put(_starTreeConfig.getDimensions().get(i).getName(), i);
    }

    _metricToIndexMapping = new HashMap<>();
    for (int i = 0; i < _starTreeConfig.getMetrics().size(); i++) {
        _metricToIndexMapping.put(_starTreeConfig.getMetrics().get(i).getName(), i);
    }

    _metricSchema = MetricSchema.fromMetricSpecs(_starTreeConfig.getMetrics());

}