Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:co.cask.tephra.hbase96.coprocessor.TransactionProcessorTest.java

License:Apache License

private HRegion createRegion(String tableName, byte[] family, long ttl) throws IOException {
    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
    HColumnDescriptor cfd = new HColumnDescriptor(family);
    if (ttl > 0) {
        cfd.setValue(TxConstants.PROPERTY_TTL, String.valueOf(ttl));
    }/* w w  w.j av  a 2  s  .  c  om*/
    cfd.setMaxVersions(10);
    htd.addFamily(cfd);
    htd.addCoprocessor(TransactionProcessor.class.getName());
    Path tablePath = FSUtils.getTableDir(FSUtils.getRootDir(conf), htd.getTableName());
    Path hlogPath = new Path(FSUtils.getRootDir(conf) + "/hlog");
    FileSystem fs = FileSystem.get(conf);
    assertTrue(fs.mkdirs(tablePath));
    HLog hLog = HLogFactory.createHLog(fs, hlogPath, tableName, conf);
    HRegionInfo regionInfo = new HRegionInfo(TableName.valueOf(tableName));
    HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(conf, fs, tablePath, regionInfo);
    return new HRegion(regionFS, hLog, conf, htd, new MockRegionServerServices(conf, null));
}

From source file:co.cask.tigon.data.hbase.HBaseTestBase.java

License:Apache License

public Path createHBaseRootDir(Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path hbaseRootdir = new Path(fs.makeQualified(fs.getHomeDirectory()), "hbase");
    conf.set(HConstants.HBASE_DIR, hbaseRootdir.toString());
    fs.mkdirs(hbaseRootdir);//from   w  w w . j a v  a  2  s.c om
    FSUtils.setVersion(fs, hbaseRootdir);
    return hbaseRootdir;
}

From source file:co.cask.tigon.data.increment.hbase96.IncrementSummingScannerTest.java

License:Apache License

private HRegion createRegion(TableName tableName, byte[] family) throws Exception {
    HTableDescriptor htd = new HTableDescriptor(tableName);
    HColumnDescriptor cfd = new HColumnDescriptor(family);
    cfd.setMaxVersions(Integer.MAX_VALUE);
    htd.addFamily(cfd);/*from ww w. ja v a  2s  .  c om*/
    htd.addCoprocessor(IncrementHandler.class.getName());
    Path tablePath = new Path("/tmp/" + tableName.getNameAsString());
    Path hlogPath = new Path("/tmp/hlog-" + tableName.getNameAsString());
    Configuration hConf = conf;
    FileSystem fs = FileSystem.get(hConf);
    assertTrue(fs.mkdirs(tablePath));
    HLog hLog = HLogFactory.createHLog(fs, hlogPath, tableName.getNameAsString(), hConf);
    HRegionInfo regionInfo = new HRegionInfo(tableName);
    HRegionFileSystem regionFS = HRegionFileSystem.createRegionOnFileSystem(hConf, fs, tablePath, regionInfo);
    return new HRegion(regionFS, hLog, hConf, htd, new MockRegionServerServices(hConf, null));
}

From source file:code.DemoWordCount.java

License:Apache License

/**
 * Runs this tool.//  w w  w .  ja  v a2s.co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool: " + DemoWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(DemoWordCount.class.getSimpleName());
    job.setJarByClass(DemoWordCount.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:ColumnStorage.ColumnProject.java

License:Open Source License

public ColumnProject(Path path, Configuration conf) throws Exception {
    String name = path.toString() + ConstVar.Navigator;
    Path naviPath = new Path(name);

    this.conf = conf;
    FileSystem fs = FileSystem.get(conf);

    loadColmnInfoFromHeadInfo(fs, path);

}

From source file:com.acme.io.JsonLoader.java

License:Apache License

/**
 * Get a schema for the data to be loaded.  
 * @param location Location as returned by 
 * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)}
 * @param job The {@link Job} object - this should be used only to obtain 
 * cluster properties through {@link Job#getConfiguration()} and not to
 * set/query any runtime job information.  
 * @return schema for the data to be loaded. This schema should represent
 * all tuples of the returned data.  If the schema is unknown or it is
 * not possible to return a schema that represents all returned data,
 * then null should be returned. The schema should not be affected by
 * pushProjection, ie. getSchema should always return the original schema
 * even after pushProjection//  w  ww.  j  ava 2s.co m
 * @throws IOException if an exception occurs while determining the schema
 */
public ResourceSchema getSchema(String location, Job job) throws IOException {
    // Open the schema file and read the schema
    // Get an HDFS handle.
    FileSystem fs = FileSystem.get(job.getConfiguration());
    DataInputStream in = fs.open(new Path(location + "/_schema"));
    String line = in.readLine();
    in.close();

    // Parse the schema
    ResourceSchema s = new ResourceSchema(Utils.getSchemaFromString(line));
    if (s == null) {
        throw new IOException("Unable to parse schema found in file " + location + "/_schema");
    }

    // Now that we have determined the schema, store it in our
    // UDFContext properties object so we have it when we need it on the
    // backend
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfcSignature });
    p.setProperty("pig.jsonloader.schema", line);

    return s;
}

From source file:com.acme.io.JsonStorage.java

License:Apache License

/**
 * Store schema of the data being written
 * @param schema Schema to be recorded//  w w  w .  j a  v a  2  s . com
 * @param location Location as returned by 
 * {@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)}
 * @param job The {@link Job} object - this should be used only to obtain 
 * cluster properties through {@link Job#getConfiguration()} and not to
 * set/query any runtime job information.  
 * @throws IOException 
 */
public void storeSchema(ResourceSchema schema, String location, Job job) throws IOException {
    // Store the schema in a side file in the same directory.  MapReduce
    // does not include files starting with "_" when reading data for a job.
    FileSystem fs = FileSystem.get(job.getConfiguration());
    DataOutputStream out = fs.create(new Path(location + "/_schema"));
    out.writeBytes(schema.toString());
    out.writeByte('\n');
    out.close();
}

From source file:com.acme.marketing.MetroResolver.java

License:Apache License

public String exec(Tuple input) throws IOException {
    if (lookup == null) {
        // We have not been initialized yet, so do it now.
        lookup = new HashMap<String, String>();

        // Get an instance of the HDFS FileSystem class so
        // we can read a file from HDFS.  We need a copy of
        // our configuration to do that.
        // Read the configuration from the UDFContext
        FileSystem fs = FileSystem.get(UDFContext.getUDFContext().getJobConf());
        DataInputStream in = fs.open(new Path(lookupFile));
        String line;/*from w  w  w.  j  a v a 2  s  .  c  o m*/
        while ((line = in.readLine()) != null) {
            String[] toks = new String[2];
            toks = line.split(":", 2);
            lookup.put(toks[0], toks[1]);
        }
        in.close();
    }
    return lookup.get((String) input.get(0));
}

From source file:com.addthis.hydra.task.output.HDFSOutputWrapperFactory.java

License:Apache License

@JsonCreator
public HDFSOutputWrapperFactory(@JsonProperty(value = "hdfsUrl", required = true) String hdfsUrl,
        @JsonProperty(value = "dir", required = true) Path dir) throws IOException {
    Configuration config = new Configuration();
    config.set("fs.defaultFS", hdfsUrl);
    config.set("fs.automatic.close", "false");
    config.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    this.fileSystem = FileSystem.get(config);
    this.dir = dir;
}

From source file:com.adsame.samelogs.SameLogsSink.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*  w ww  .j a  v a 2 s  . c  o  m*/
public void append(Event e) throws IOException {
    // append the event to the output
    byte[] fn = e.get(TailSource.A_TAILSRCFILE);
    byte[] bd = e.getBody();
    System.out.println("##" + new String(fn) + "##" + new String(bd));

    Map<String, byte[]> maps = e.getAttrs();

    Iterator iter = maps.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry entry = (Map.Entry) iter.next();
        String key = (String) entry.getKey();
        System.out.println("key: " + key);
    }

    // here we are assuming the body is a string
    pw.println(new String(e.getBody()));
    pw.flush(); // so we can see it in the file right away

    Configuration configuration = new Configuration();
    FileSystem hdfsFileSystem = FileSystem.get(configuration);
    Path path = new Path("hdfs://nodie-Ubuntu4:9000/user/nodie/input/dfs/hello");
    FSDataOutputStream out;
    System.out.println("exists: " + hdfsFileSystem.exists(path));
    if (hdfsFileSystem.exists(path)) {
        out = hdfsFileSystem.append(path);
    } else {
        out = hdfsFileSystem.create(path);
    }

    out.write(e.getBody());
    out.writeChar('\n');
    out.flush();
    out.close();
}