Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testConstructorFieldInSameFile() {
    try {//from   ww w.  j  av a2s .  c om
        Configuration conf = new Configuration();
        Path path = new Path(prefix);
        FileSystem fs = FileSystem.get(conf);

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 7);
        idxs.add((short) 9);

        ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf);

        if (client.cp == null) {
            fail("cp null");
        }

        if (client.list.size() != 1) {
            fail("error list size:" + client.list.size());
        }

        if (!client.list.get(0).equals(multiFileNameString)) {
            fail("error filename:" + client.list.get(0));
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testConstructorFieldInDiffFile() {
    try {//from   w  w  w . j  a va 2  s. c o  m
        Configuration conf = new Configuration();
        Path path = new Path(prefix);
        FileSystem fs = FileSystem.get(conf);

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 0);
        idxs.add((short) 7);
        idxs.add((short) 4);

        ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf);

        if (client.cp == null) {
            fail("cp null");
        }
        if (client.fds.length != 3) {
            fail("error fds.len:" + client.fds.length);
        }
        for (int i = 0; i < client.fds.length; i++) {
            if (client.fds[i] == null) {
                fail("null fd:" + i);
            }
        }
        if (client.list.size() != 3) {
            fail("error list size:" + client.list.size());
        }

        if (!client.list.get(0).equals(byteFileName)) {
            fail("error filename:" + client.list.get(0));
        }
        if (!client.list.get(1).equals(multiFileNameString)) {
            fail("error filename:" + client.list.get(1));
        }
        if (!client.list.get(2).equals(floatFileName)) {
            fail("error filename:" + client.list.get(2));
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:ColumnStorageBasicTest.java

License:Open Source License

public void testGetRecordByLine() {
    try {//from   w ww. j av a 2  s .c  o  m
        Configuration conf = new Configuration();
        Path path = new Path(prefix);
        FileSystem fs = FileSystem.get(conf);

        ArrayList<Short> idxs = new ArrayList<Short>(10);
        idxs.add((short) 0);
        idxs.add((short) 7);
        idxs.add((short) 4);

        ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf);

        Record record = client.getRecordByLine(-1);
        if (record != null) {
            fail("should return null record 1");
        }

        record = client.getRecordByLine(10);
        if (record != null) {
            fail("should return null record 2");
        }

        for (int i = 0; i < 10; i++) {
            record = client.getRecordByLine(i);
            if (record == null) {
                fail("should not return null record");
            }

            if (record.fieldValues().size() != 5) {
                fail("error field num:" + record.fieldValues().size());
            }

            record.show();

            judgeNofixRecord(record, i);

        }
    } catch (Exception e) {
        e.printStackTrace();
        fail("get exception:" + e.getMessage());
    }
}

From source file:HadoopWordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.setProperty("hazelcast.logging.type", "log4j");

    Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath());
    Path outputPath = new Path(OUTPUT_PATH);

    // set up the Hadoop job config, the input and output paths and formats
    JobConf jobConfig = new JobConf();
    jobConfig.setInputFormat(TextInputFormat.class);
    jobConfig.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(jobConfig, outputPath);
    TextInputFormat.addInputPath(jobConfig, inputPath);

    // Delete the output directory, if already exists
    FileSystem.get(new Configuration()).delete(outputPath, true);

    JetConfig cfg = new JetConfig();
    cfg.setInstanceConfig(new InstanceConfig()
            .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2)));

    JetInstance jetInstance = Jet.newJetInstance(cfg);
    Jet.newJetInstance(cfg);/* w w w .  j a v  a 2s  .c  o  m*/

    try {
        System.out.print("\nCounting words from " + inputPath);
        long start = nanoTime();
        jetInstance.newJob(buildDag(jobConfig)).execute().get();
        System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds.");
        System.out.println("Output written to " + outputPath);
    } finally {
        Jet.shutdownAll();
    }
}

From source file:TestFS.java

License:Open Source License

public static void main(String[] args) throws IOException {
    FileSystem fs = FileSystem.get(new Configuration());

    System.out.println(fs.toString());

}

From source file:ComputeCooccurrenceMatrixPairs.java

License:Apache License

/**
 * Runs this tool./*from  ww w  .j  ava  2s  . co m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;
    int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2;

    LOG.info("Tool: " + ComputeCooccurrenceMatrixPairs.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - window: " + window);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(ComputeCooccurrenceMatrixPairs.class.getSimpleName());
    job.setJarByClass(ComputeCooccurrenceMatrixPairs.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    job.getConfiguration().setInt("window", window);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStrings.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(PairOfStrings.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:JaqlShell.java

License:Apache License

/**
 * @param dir//from   w  w w  . j a va2 s  .  co m
 * @param numNodes
 * @param format
 * @throws Exception
 */
public void init(String dir, int numNodes) throws Exception {
    String vInfo = VersionInfo.getVersion();
    System.setProperty("test.build.data", dir);
    m_conf = new Configuration();

    // setup conf according to the Hadoop version
    if (vInfo.indexOf("0.20") < 0) {
        throw new Exception("Unsupported Hadoop version: " + vInfo);
    }

    // setup the mini dfs cluster
    m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null);
    FileSystem filesystem = m_fs.getFileSystem();
    m_conf.set("fs.default.name", filesystem.getUri().toString());
    Path parentdir = filesystem.getHomeDirectory();
    filesystem.mkdirs(parentdir);
    //FSUtils.setVersion(filesystem, parentdir);

    // setup hbase cluster (only if OS is not windows)
    //    if(!System.getProperty("os.name").toLowerCase().contains("win")) {
    //      m_conf.set(HConstants.HBASE_DIR, parentdir.toString());      
    //      Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR)));
    //
    //      // prime the hdfs for hbase information...
    //      HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf);
    //      HRegion.addRegionToMETA(root, meta);
    //
    //      // ... and close the root and meta
    //      if (meta != null) {
    //        meta.close();
    //        meta.getLog().closeAndDelete();
    //      }
    //      if (root != null) {
    //        root.close();
    //        root.getLog().closeAndDelete();
    //      }
    //
    //      try
    //      {
    //        this.zooKeeperCluster = new MiniZooKeeperCluster();
    //        File testDir = new File(dir);
    //        int clientPort = this.zooKeeperCluster.startup(testDir);
    //        m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort));
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup zookeeper");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // start the mini cluster
    //        m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes);
    //      } catch(Exception e) {
    //        LOG.error("Unable to startup hbase");
    //        throw new IOException(e);
    //      }
    //      try {
    //        // opening the META table ensures that cluster is running
    //        new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME);        
    //
    //        //setupOverride(conf);
    //      }
    //      catch (Exception e)
    //      {
    //        LOG.warn("Could not verify that hbase is up", e);
    //      }
    //      setupOverride();
    //    }

    m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // make the home directory if it does not exist
    Path hd = fs.getWorkingDirectory();
    if (!fs.exists(hd))
        fs.mkdirs(hd);

    // make the $USER/_temporary directory if it does not exist
    Path tmpPath = new Path(hd, "_temporary");
    if (!fs.exists(tmpPath))
        fs.mkdirs(tmpPath);

    //    if (m_base != null)
    //    {
    //      try {
    //        m_admin = new HBaseAdmin((HBaseConfiguration) m_conf);
    //        HTableDescriptor[] tables = m_admin.listTables();
    //        if (tables != null)
    //        {
    //          for (int i = 0; i < tables.length; i++)
    //          {
    //            m_admin.enableTable(tables[i].getName());
    //          }
    //        }
    //      } catch(Exception e) {
    //        LOG.warn("failed to enable hbase tables");
    //      }
    //    }
}

From source file:JaqlShell.java

License:Apache License

/**
 * @throws Exception/*from   w ww  . ja  va  2 s  .c o  m*/
 */
public void init() throws Exception {
    // do nothing in the case of cluster
    //m_conf = new HBaseConfiguration();
    //m_admin = new HBaseAdmin(m_conf);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // make the home directory if it does not exist
    Path hd = fs.getWorkingDirectory();
    if (!fs.exists(hd))
        fs.mkdirs(hd);
}

From source file:SingleFileReader.java

License:Apache License

private void writeFile(String cached, String fSize) throws Exception {
    fileSize = Double.parseDouble((fSize.split("g|G"))[0]) * 1024 * 1024 * 1024;
    String hdfsFolder = "/hdfs_test/";
    String hdfsFile = hdfsFolder + fSize;
    short replication = 1;
    boolean overWrite = true;
    int blockSize = 536870912;
    double numIters = fileSize / (double) bufferSize;

    Configuration conf = getConf();
    fs = FileSystem.get(conf);
    hdfsFilePath = new Path(hdfsFile);
    OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize);

    /* Initialize byte buffer */
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    buf.order(ByteOrder.nativeOrder());
    for (int k = 0; k < bufferSize / Integer.SIZE; k++) {
        buf.putInt(k);/*from w  w w  .  j a va  2s .  c  o  m*/
    }
    buf.flip();

    /* Write the content of the byte buffer 
     to the HDFS file*/
    t = new Timer();
    t.start(0);
    for (long i = 0; i < numIters; i++) {
        os.write(buf.array());
        buf.flip();
    }
    t.end(0);
    os.close();

    /* Check to see if the file needs to be cached */
    t.start(1);
    if (cached.equals("cache")) {
        String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                + " -pool hdfs_test";
        // System.out.println(cmdStr);
        Process p = Runtime.getRuntime().exec(cmdStr);
        p.waitFor();
        String cmdOutLine = "";
        StringBuffer cmdOutBuf = new StringBuffer();
        BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
        while ((cmdOutLine = cmdOutReader.readLine()) != null) {
            cmdOutBuf.append(cmdOutLine + "\n");
        }
        // System.out.println(cmdOutBuf.toString());
    }
    t.end(1);
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*///from   www.j  av  a  2s  . c o m
public static void main(String args[]) throws Exception {
    // run the HoopLink constructor; We need this to have a global settings registry       
    @SuppressWarnings("unused")
    HoopLink link = new HoopLink();

    dbg("main ()");

    showTimeStamp();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */

    HoopPerformanceMeasure metrics = new HoopPerformanceMeasure();
    metrics.setMarker("main");
    HoopLink.metrics.getDataSet().add(metrics);

    if (parseArgs(args) == false) {
        usage();
        return;
    }

    if (HoopLink.postonly == true) {
        postOnly();
        return;
    }

    if (HoopLink.task.equals("none") == true) {
        dbg("No task defined, please use the commandline option -task <task>");
        return;
    }

    dbg("Starting system ...");

    HoopRemoteTask driver = new HoopRemoteTask();

    if (HoopLink.useHadoop == false) {
        dbg("Starting built-in mapper ...");

        driver.indexDocuments();
    } else {
        dbg("Starting hadoop job ...");

        Configuration conf = new Configuration();

        // TRANSFER SETTHoopGS FROM HoopLink to Configuration!!!

        transferConf(conf);

        // Now we're feeling much better

        HoopRemoteTask.hdfs = FileSystem.get(conf);

        if (HoopLink.dbglocal == true) {
            dbg("Enabling local debugging ...");
            conf.set("mapred.job.tracker", "local");
        } else
            dbg("Disabling local debugging");

        JobConf job = new JobConf(conf, HoopRemoteTask.class);

        job.setJobName(driver.getClassName());

        driver.setJob(job);

        @SuppressWarnings("unused")
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        job.setJarByClass(HoopRemoteTask.class);

        if (HoopLink.task.equals("invert") == true) {
            dbg("Configuring job for invert task ...");

            job.setReducerClass(HoopInvertedListReducer.class);
            job.setMapperClass(HoopInvertedListMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
        }

        if (HoopLink.task.equals("wordcount") == true) {
            dbg("Configuring job for wordcount task ...");

            job.setReducerClass(HoopWordCountReducer.class);
            job.setMapperClass(HoopWordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
        }

        dbg("Using input path: " + HoopLink.datapath);
        dbg("Using output path: " + HoopLink.outputpath);

        FileInputFormat.addInputPath(job, new Path(HoopLink.datapath));
        FileOutputFormat.setOutputPath(job, new Path(HoopLink.outputpath));

        job.setInputFormat(HoopWholeFileInputFormat.class);

        if ((HoopLink.shardcreate.equals("mos") == true) && (HoopLink.nrshards > 1)) {
            dbg("Setting output to sharded output streams class ...");

            job.setOutputFormat(HoopShardedOutputFormat.class);
        } else
            job.setOutputFormat(TextOutputFormat.class);

        /**
         * Temporarily commented out for testing purposes
         */

        //job.setPartitionerClass (HoopPartitioner.class);                      

        driver.register("Main");

        JobClient.runJob(job);

        postProcess(conf);
    }

    showTimeStamp();

    metrics.closeMarker();
    long timeTaken = metrics.getYValue();
    //long timeTaken=metrics.getMarkerRaw ();
    metrics.printMetrics(timeTaken);

    driver.unregister();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */
    //stats.calcStatistics();
    //dbg (stats.printStatistics());
}