List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.linkedin.cubert.utils.CodeDictionary.java
License:Open Source License
/** * Reads dictionary from the HDFS filesystem. * /* w w w . j a v a 2 s . co m*/ * @param fs * @param path * @throws IOException */ public void read(FileSystem fs, Path path) throws IOException { FSDataInputStream istream = fs.open(path); BufferedReader reader = new BufferedReader(new InputStreamReader(istream)); read(reader); reader.close(); }
From source file:com.linkedin.cubert.utils.CubertMD.java
License:Open Source License
public static HashMap<String, String> readMetafile(String metaFilePath) throws IOException { Job tempjob = new Job(); Configuration tempconf = tempjob.getConfiguration(); FileSystem fs = FileSystem.get(tempconf); HashMap<String, String> result = new HashMap<String, String>(); FSDataInputStream inStream;//w w w.j a v a2s. c o m try { inStream = fs.open(new Path(metaFilePath + "/.meta")); BufferedReader breader = new BufferedReader(new InputStreamReader(inStream)); String line; while ((line = breader.readLine()) != null) { String[] splits = line.split("\\s+"); result.put(splits[0], splits[1]); } } catch (IOException e) { return result; } return result; }
From source file:com.linkedin.hadoop.example.WordCountJobTest.java
License:Apache License
/** * Helper method to read the word counts output file from the given path on HDFS. * * @param outputPath The path on HDFS to the word counts output file * @return The word counts as an ordered map of word to count * @throws Exception If there is a problem while reading the word counts *///from w ww . jav a 2 s . c o m private Map<String, Long> readWordCounts(Path outputPath) throws Exception { FileSystem fileSystem = getFileSystem(); BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(outputPath))); Map<String, Long> wordCounts = new LinkedHashMap<>(); try { String line = reader.readLine(); while (line != null) { int tab = line.indexOf('\t'); if (tab == -1) { throw new Exception(String.format("No tab delimiter found in the line: %s", line)); } String word = line.substring(0, tab); long count = Long.parseLong(line.substring(tab + 1)); if (wordCounts.containsKey(word)) { throw new Exception(String.format("Found repeated instance of the word: %s", word)); } wordCounts.put(word, count); line = reader.readLine(); } } finally { reader.close(); } return wordCounts; }
From source file:com.linkedin.mapred.AvroFileReader.java
License:Open Source License
protected DataFileStream<Object> getAvroDataStream(Path path) throws IOException { FileSystem fs = getFilesystem(_conf, path); GenericDatumReader<Object> avroReader = _isSpecific ? new SpecificDatumReader<Object>() : new GenericDatumReader<Object>(); InputStream hdfsInputStream = fs.open(path); return new DataFileStream<Object>(hdfsInputStream, avroReader); }
From source file:com.linkedin.mapred.AvroUtils.java
License:Open Source License
/** * Loads the schema from an Avro data file. * //from www .ja v a2 s . com * @param conf The JobConf. * @param path The path to the data file. * @return The schema read from the data file's metadata. * @throws IOException */ public static Schema getSchemaFromFile(JobConf conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(new Configuration()); FSDataInputStream dataInputStream = fs.open(path); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader); return dataFileStream.getSchema(); }
From source file:com.linkedin.mapred.AvroUtils.java
License:Open Source License
/** * Obtain a DataFileStream given a conf and path * @param conf//from w w w . j ava2 s . co m * @param path * @return * @throws IOException */ public static DataFileStream<Object> getAvroDataStream(JobConf conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>(); InputStream hdfsInputStream = fs.open(path); return new DataFileStream<Object>(hdfsInputStream, avroReader); }
From source file:com.linkedin.pinot.core.indexsegment.utils.InputOutputStreamUtils.java
License:Apache License
public static InputStream getInputStream(String filePath, FileSystemMode mode, FileSystem fs) throws IOException { InputStream is = null;//from w w w . jav a 2s . co m switch (mode) { case DISK: is = new BufferedInputStream(new FileInputStream(new File(filePath))); break; case HDFS: is = new BufferedInputStream(fs.open(new Path(filePath))); break; default: throw new UnsupportedOperationException(); } return is; }
From source file:com.linkedin.pinot.hadoop.job.SegmentTarPushJob.java
License:Apache License
public void pushOneTarFile(FileSystem fs, Path path) throws Exception { String fileName = path.getName(); if (!fileName.endsWith(".tar.gz")) { return;/*from ww w .j a v a2 s . c o m*/ } long length = fs.getFileStatus(path).getLen(); for (String host : _hosts) { InputStream inputStream = null; try { inputStream = fs.open(path); fileName = fileName.split(".tar")[0]; LOGGER.info("******** Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port); try { int responseCode = FileUploadUtils.sendSegmentFile(host, _port, fileName, inputStream, length); LOGGER.info("Response code: {}", responseCode); } catch (Exception e) { LOGGER.error("******** Error Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port); LOGGER.error("Caught exception during upload", e); throw new RuntimeException("Got Error during send tar files to push hosts!"); } } finally { inputStream.close(); } } }
From source file:com.linkedin.thirdeye.bootstrap.segment.create.SegmentCreationPhaseJob.java
License:Apache License
private Schema createSchema(String configPath) throws IOException { FileSystem fs = FileSystem.get(new Configuration()); StarTreeConfig starTreeConfig = StarTreeConfig.decode(fs.open(new Path(configPath))); LOGGER.info("{}", starTreeConfig); Schema schema = new Schema(); for (DimensionSpec dimensionSpec : starTreeConfig.getDimensions()) { FieldSpec spec = new DimensionFieldSpec(); spec.setName(dimensionSpec.getName()); spec.setDataType(DataType.STRING); spec.setSingleValueField(true);//w w w. ja v a 2s. c om schema.addSchema(dimensionSpec.getName(), spec); } for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { FieldSpec spec = new MetricFieldSpec(); spec.setName(metricSpec.getName()); spec.setDataType(DataType.valueOf(metricSpec.getType().toString())); spec.setSingleValueField(true); schema.addSchema(metricSpec.getName(), spec); } TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, starTreeConfig.getTime().getBucket().getUnit(), starTreeConfig.getTime().getColumnName()); TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, starTreeConfig.getTime().getBucket().getUnit(), starTreeConfig.getTime().getColumnName()); schema.addSchema(starTreeConfig.getTime().getColumnName(), new TimeFieldSpec(incoming, outgoing)); schema.setSchemaName(starTreeConfig.getCollection()); return schema; }
From source file:com.linkedin.thirdeye.bootstrap.segment.create.ThirdeyeRecordReader.java
License:Apache License
public ThirdeyeRecordReader(String sequenceFileName, Schema schema, String starTreeConfigFileName) throws IOException { super();/*from w w w . j a v a 2 s. c o m*/ super.initNullCounters(schema); _sequenceFile = new File(sequenceFileName); _schema = schema; FileSystem fs = FileSystem.get(new Configuration()); _starTreeConfig = StarTreeConfig.decode(fs.open(new Path(starTreeConfigFileName))); _dimensionToIndexMapping = new HashMap<>(); for (int i = 0; i < _starTreeConfig.getDimensions().size(); i++) { _dimensionToIndexMapping.put(_starTreeConfig.getDimensions().get(i).getName(), i); } _metricToIndexMapping = new HashMap<>(); for (int i = 0; i < _starTreeConfig.getMetrics().size(); i++) { _metricToIndexMapping.put(_starTreeConfig.getMetrics().get(i).getName(), i); } _metricSchema = MetricSchema.fromMetricSpecs(_starTreeConfig.getMetrics()); }