List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:co.cask.tephra.hbase96.coprocessor.TransactionProcessorTest.java
License:Apache License
private HRegion createRegion(String tableName, byte[] family, long ttl) throws IOException { HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName)); HColumnDescriptor cfd = new HColumnDescriptor(family); if (ttl > 0) { cfd.setValue(TxConstants.PROPERTY_TTL, String.valueOf(ttl)); }/* w w w.j av a 2 s . c om*/ cfd.setMaxVersions(10); htd.addFamily(cfd); htd.addCoprocessor(TransactionProcessor.class.getName()); Path tablePath = FSUtils.getTableDir(FSUtils.getRootDir(conf), htd.getTableName()); Path hlogPath = new Path(FSUtils.getRootDir(conf) + "/hlog"); FileSystem fs = FileSystem.get(conf); assertTrue(fs.mkdirs(tablePath)); HLog hLog = HLogFactory.createHLog(fs, hlogPath, tableName, conf); HRegionInfo regionInfo = new HRegionInfo(TableName.valueOf(tableName)); HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(conf, fs, tablePath, regionInfo); return new HRegion(regionFS, hLog, conf, htd, new MockRegionServerServices(conf, null)); }
From source file:co.cask.tigon.data.hbase.HBaseTestBase.java
License:Apache License
public Path createHBaseRootDir(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path hbaseRootdir = new Path(fs.makeQualified(fs.getHomeDirectory()), "hbase"); conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString()); fs.mkdirs(hbaseRootdir);//from w w w . j a v a 2 s.c om FSUtils.setVersion(fs, hbaseRootdir); return hbaseRootdir; }
From source file:co.cask.tigon.data.increment.hbase96.IncrementSummingScannerTest.java
License:Apache License
private HRegion createRegion(TableName tableName, byte[] family) throws Exception { HTableDescriptor htd = new HTableDescriptor(tableName); HColumnDescriptor cfd = new HColumnDescriptor(family); cfd.setMaxVersions(Integer.MAX_VALUE); htd.addFamily(cfd);/*from ww w. ja v a 2s . c om*/ htd.addCoprocessor(IncrementHandler.class.getName()); Path tablePath = new Path("/tmp/" + tableName.getNameAsString()); Path hlogPath = new Path("/tmp/hlog-" + tableName.getNameAsString()); Configuration hConf = conf; FileSystem fs = FileSystem.get(hConf); assertTrue(fs.mkdirs(tablePath)); HLog hLog = HLogFactory.createHLog(fs, hlogPath, tableName.getNameAsString(), hConf); HRegionInfo regionInfo = new HRegionInfo(tableName); HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(hConf, fs, tablePath, regionInfo); return new HRegion(regionFS, hLog, hConf, htd, new MockRegionServerServices(hConf, null)); }
From source file:code.DemoWordCount.java
License:Apache License
/** * Runs this tool.// w w w . ja v a2s.co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + DemoWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(DemoWordCount.class.getSimpleName()); job.setJarByClass(DemoWordCount.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ColumnStorage.ColumnProject.java
License:Open Source License
public ColumnProject(Path path, Configuration conf) throws Exception { String name = path.toString() + ConstVar.Navigator; Path naviPath = new Path(name); this.conf = conf; FileSystem fs = FileSystem.get(conf); loadColmnInfoFromHeadInfo(fs, path); }
From source file:com.acme.io.JsonLoader.java
License:Apache License
/** * Get a schema for the data to be loaded. * @param location Location as returned by * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} * @param job The {@link Job} object - this should be used only to obtain * cluster properties through {@link Job#getConfiguration()} and not to * set/query any runtime job information. * @return schema for the data to be loaded. This schema should represent * all tuples of the returned data. If the schema is unknown or it is * not possible to return a schema that represents all returned data, * then null should be returned. The schema should not be affected by * pushProjection, ie. getSchema should always return the original schema * even after pushProjection// w ww. j ava 2s.co m * @throws IOException if an exception occurs while determining the schema */ public ResourceSchema getSchema(String location, Job job) throws IOException { // Open the schema file and read the schema // Get an HDFS handle. FileSystem fs = FileSystem.get(job.getConfiguration()); DataInputStream in = fs.open(new Path(location + "/_schema")); String line = in.readLine(); in.close(); // Parse the schema ResourceSchema s = new ResourceSchema(Utils.getSchemaFromString(line)); if (s == null) { throw new IOException("Unable to parse schema found in file " + location + "/_schema"); } // Now that we have determined the schema, store it in our // UDFContext properties object so we have it when we need it on the // backend UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfcSignature }); p.setProperty("pig.jsonloader.schema", line); return s; }
From source file:com.acme.io.JsonStorage.java
License:Apache License
/** * Store schema of the data being written * @param schema Schema to be recorded// w w w . j a v a 2 s . com * @param location Location as returned by * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} * @param job The {@link Job} object - this should be used only to obtain * cluster properties through {@link Job#getConfiguration()} and not to * set/query any runtime job information. * @throws IOException */ public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException { // Store the schema in a side file in the same directory. MapReduce // does not include files starting with "_" when reading data for a job. FileSystem fs = FileSystem.get(job.getConfiguration()); DataOutputStream out = fs.create(new Path(location + "/_schema")); out.writeBytes(schema.toString()); out.writeByte('\n'); out.close(); }
From source file:com.acme.marketing.MetroResolver.java
License:Apache License
public String exec(Tuple input) throws IOException { if (lookup == null) { // We have not been initialized yet, so do it now. lookup = new HashMap<String, String>(); // Get an instance of the HDFS FileSystem class so // we can read a file from HDFS. We need a copy of // our configuration to do that. // Read the configuration from the UDFContext FileSystem fs = FileSystem.get(UDFContext.getUDFContext().getJobConf()); DataInputStream in = fs.open(new Path(lookupFile)); String line;/*from w w w. j a v a 2 s . c o m*/ while ((line = in.readLine()) != null) { String[] toks = new String[2]; toks = line.split(":", 2); lookup.put(toks[0], toks[1]); } in.close(); } return lookup.get((String) input.get(0)); }
From source file:com.addthis.hydra.task.output.HDFSOutputWrapperFactory.java
License:Apache License
@JsonCreator public HDFSOutputWrapperFactory(@JsonProperty(value = "hdfsUrl", required = true) String hdfsUrl, @JsonProperty(value = "dir", required = true) Path dir) throws IOException { Configuration config = new Configuration(); config.set("fs.defaultFS", hdfsUrl); config.set("fs.automatic.close", "false"); config.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); this.fileSystem = FileSystem.get(config); this.dir = dir; }
From source file:com.adsame.samelogs.SameLogsSink.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/* w ww .j a v a 2 s . c o m*/ public void append(Event e) throws IOException { // append the event to the output byte[] fn = e.get(TailSource.A_TAILSRCFILE); byte[] bd = e.getBody(); System.out.println("##" + new String(fn) + "##" + new String(bd)); Map<String, byte[]> maps = e.getAttrs(); Iterator iter = maps.entrySet().iterator(); while (iter.hasNext()) { Map.Entry entry = (Map.Entry) iter.next(); String key = (String) entry.getKey(); System.out.println("key: " + key); } // here we are assuming the body is a string pw.println(new String(e.getBody())); pw.flush(); // so we can see it in the file right away Configuration configuration = new Configuration(); FileSystem hdfsFileSystem = FileSystem.get(configuration); Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello"); FSDataOutputStream out; System.out.println("exists: " + hdfsFileSystem.exists(path)); if (hdfsFileSystem.exists(path)) { out = hdfsFileSystem.append(path); } else { out = hdfsFileSystem.create(path); } out.write(e.getBody()); out.writeChar('\n'); out.flush(); out.close(); }