List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.maxpoint.cascading.avro.AvroScheme.java
License:Open Source License
private void retrieveSchema(FlowProcess<JobConf> flowProcess, Tap tap) { try {//from ww w .j a v a2s.c o m if (tap instanceof CompositeTap) tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); final String file = tap.getIdentifier(); Path p = new Path(file); Configuration conf = new Configuration(); final FileSystem fs = p.getFileSystem(conf); for (FileStatus status : fs.listStatus(p)) { p = status.getPath(); // no need to open them all InputStream stream = new BufferedInputStream(fs.open(p)); DataFileStream reader = new DataFileStream(stream, new ReflectDatumReader()); dataSchema = reader.getSchema(); retrieveSourceFields(tap); return; } throw new RuntimeException("no schema found in " + file); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.mcd.gdw.daas.mapreduce.ZipFileRecordReader.java
License:Apache License
/** * Initialise and open the ZIP file from the FileSystem *///from ww w .j a v a2 s .co m @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { this.taskAttemptContext = taskAttemptContext; FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); path = split.getPath(); FileSystem fs = path.getFileSystem(conf); zipfilename = path.getName(); // Open the stream fsin = fs.open(path); zip = new ZipInputStream(fsin); skipFilesonSize = taskAttemptContext.getConfiguration().get("skipFilesonSize"); if (taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE") != null) MAX_FILE_SIZE = Long.parseLong(taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE")); }
From source file:com.mellanox.r4h.TestReadWhileWriting.java
License:Apache License
static void checkFile(Path p, int expectedsize, final Configuration conf) throws IOException, InterruptedException { //open the file with another user account final String username = UserGroupInformation.getCurrentUser().getShortUserName() + "_" + ++userCount; UserGroupInformation ugi = UserGroupInformation.createUserForTesting(username, new String[] { "supergroup" }); final FileSystem fs = DFSTestUtil.getFileSystemAs(ugi, conf); final HdfsDataInputStream in = (HdfsDataInputStream) fs.open(p); //Check visible length Assert.assertTrue(in.getVisibleLength() >= expectedsize); //Able to read? for (int i = 0; i < expectedsize; i++) { Assert.assertEquals((byte) i, (byte) in.read()); }/*from w ww . j a v a 2s .com*/ in.close(); }
From source file:com.metamx.druid.indexer.DbUpdaterJob.java
License:Open Source License
@Override public boolean run() { final Configuration conf = new Configuration(); ImmutableList.Builder<DataSegment> publishedSegmentsBuilder = ImmutableList.builder(); for (String propName : System.getProperties().stringPropertyNames()) { if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); }/*from w w w .j a va2s . c om*/ } final Path descriptorInfoDir = config.makeDescriptorInfoDir(); try { FileSystem fs = descriptorInfoDir.getFileSystem(conf); for (FileStatus status : fs.listStatus(descriptorInfoDir)) { final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class); dbi.withHandle(new HandleCallback<Void>() { @Override public Void withHandle(Handle handle) throws Exception { handle.createStatement(String.format( "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", spec.getSegmentTable())).bind("id", segment.getIdentifier()) .bind("dataSource", segment.getDataSource()) .bind("created_date", new DateTime().toString()) .bind("start", segment.getInterval().getStart().toString()) .bind("end", segment.getInterval().getEnd().toString()) .bind("partitioned", segment.getShardSpec().getPartitionNum()) .bind("version", segment.getVersion()).bind("used", true) .bind("payload", jsonMapper.writeValueAsString(segment)).execute(); return null; } }); publishedSegmentsBuilder.add(segment); log.info("Published %s", segment.getIdentifier()); } } catch (IOException e) { throw Throwables.propagate(e); } publishedSegments = publishedSegmentsBuilder.build(); return true; }
From source file:com.metamx.druid.indexer.HadoopDruidIndexerNode.java
License:Open Source License
@LifecycleStart public void start() throws Exception { Preconditions.checkNotNull(argumentSpec, "argumentSpec"); final HadoopDruidIndexerConfig config; if (argumentSpec.startsWith("{")) { config = HadoopDruidIndexerConfig.fromString(argumentSpec); } else if (argumentSpec.startsWith("s3://")) { final Path s3nPath = new Path(String.format("s3n://%s", argumentSpec.substring("s3://".length()))); final FileSystem fs = s3nPath.getFileSystem(new Configuration()); String configString = CharStreams.toString(new InputSupplier<InputStreamReader>() { @Override/*from ww w.j a va2 s . co m*/ public InputStreamReader getInput() throws IOException { return new InputStreamReader(fs.open(s3nPath)); } }); config = HadoopDruidIndexerConfig.fromString(configString); } else { config = HadoopDruidIndexerConfig.fromFile(new File(argumentSpec)); } if (intervalSpec != null) { final List<Interval> dataInterval = Lists.transform(Arrays.asList(intervalSpec.split(",")), new StringIntervalFunction()); config.setIntervals(dataInterval); } new HadoopDruidIndexerJob(config).run(); }
From source file:com.metamx.druid.indexer.Utils.java
License:Open Source License
public static InputStream openInputStream(Path inputPath, final FileSystem fileSystem) throws IOException { return fileSystem.open(inputPath); }
From source file:com.metamx.druid.indexer.Utils.java
License:Open Source License
public static Map<String, Object> getStats(JobContext job, Path statsPath) throws IOException { FileSystem fs = statsPath.getFileSystem(job.getConfiguration()); return jsonMapper.readValue(fs.open(statsPath), new TypeReference<Map<String, Object>>() { });/* w ww . ja v a 2s .c o m*/ }
From source file:com.metamx.milano.pig.MilanoLoadFunc.java
License:Apache License
/** * This builds a Pig ResourceSchema from the input file(s). This relies on the existence of TypeMetadata. * This is the method by which we pass the schema types and names directly to pig without having to specify them directly. * * @param location As passed to relativeToAbsolutePath * @param job The job./*ww w . java2s . c o m*/ * * @return Returns a ResourceSchema representing the incoming file(s) or null if TypeMetadata does not exist. * * @throws IOException Not thrown directly, but thrown from getMessageSchema where it indicates an unsupported type. */ @Override public ResourceSchema getSchema(String location, Job job) throws IOException { Configuration conf = job.getConfiguration(); Properties props = ConfigurationUtil.toProperties(conf); // HACK: Here we open the file directly to read the TypeMetadata. // HACK: There may be a better more direct way to do this, but it works for now. Path path = new Path(location); FileSystem fileSystem = path.getFileSystem(conf); FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { log.debug(String.format("Path is a directory.")); path = getFilePath(path, fileSystem); if (path == null) { return null; } } else if (!fileSystem.exists(path)) { return null; } MilanoProtoFile.Reader reader = MilanoProtoFile.createReader(fileSystem.open(path)); typeMetadata = reader.getMetadata(); reader.close(); if (typeMetadata == null) { return null; } descriptor = MilanoTool.with(typeMetadata).getDescriptor(); return new ResourceSchema(getMessageSchema(descriptor)); }
From source file:com.ml.ira.algos.LogisticModelParameters.java
License:Apache License
public static LogisticModelParameters loadFrom(Path path) throws IOException { FileSystem ofs = path.getFileSystem(new Configuration()); if (!ofs.exists(path)) { throw new IOException(path.toString() + " does not exists. "); }/*from w w w . j a v a2 s.c o m*/ LogisticModelParameters result = new LogisticModelParameters(); FSDataInputStream in = ofs.open(path); result.readFields(in); ofs.close(); return result; }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java
@Override public void setup(Context context) { number_of_clusters = context.getConfiguration().getInt("number_of_clusters", 2); feature_size = context.getConfiguration().getInt("feature_size", 1); num_of_members_in_a_cluster = new int[number_of_clusters]; // initialization of ArrayLists 'cetroid_of_clusters', and 'sum_of_members_in_a_cluster' and array 'num_of_members_in_a_cluster' Float[] t = new Float[feature_size]; for (int i = 0; i < feature_size; i++) t[i] = 0.0f;//w ww.j av a 2s .c om for (int i = 0; i < number_of_clusters; i++) { cetroid_of_clusters.add(t); sum_of_members_in_a_cluster.add(t); num_of_members_in_a_cluster[i] = 0; } // Read the current values of cetroids of clusters from k_means.txt file // If it is the first iteration, the cetroids of clusters must be initialized as // random number (regard to the min & max values of each features) or by user. try { Float[] t_float; String uri = "/user/hduser/k_mean.txt"; FileSystem fs = FileSystem.get(context.getConfiguration()); if (fs.exists(new Path(uri))) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(uri)))); String[] temp; for (int i = 0; i < number_of_clusters; i++) { temp = br.readLine().split(","); t_float = new Float[feature_size]; for (int j = 0; j < feature_size; j++) t_float[j] = Float.parseFloat(temp[j]); cetroid_of_clusters.set(i, t_float); } } else { // initialization of clusters' centroids by user for our specific data. // one good way is to randomly choose this values and put on "k_mean.txt" file as: // cetroid of feature-1 seperated by ',' // cetroid of feature-2 seperated by ',' // for example, regard to following values: // 13.325872,16.854961 // 13.5158205,8.382423 // 16.05023,4.76127 t_float = new Float[2]; t_float[0] = 13.325872f; t_float[1] = 16.854961f; cetroid_of_clusters.set(0, t_float); t_float = new Float[2]; t_float[0] = 13.5158205f; t_float[1] = 8.382423f; cetroid_of_clusters.set(1, t_float); t_float = new Float[2]; t_float[0] = 16.05023f; t_float[1] = 4.76127f; cetroid_of_clusters.set(2, t_float); } } catch (Exception e) { } }