List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldInSameFile() { try {//from ww w. j av a2s . c om Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 7); idxs.add((short) 9); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); if (client.cp == null) { fail("cp null"); } if (client.list.size() != 1) { fail("error list size:" + client.list.size()); } if (!client.list.get(0).equals(multiFileNameString)) { fail("error filename:" + client.list.get(0)); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testConstructorFieldInDiffFile() { try {//from w w w . j a va 2 s. c o m Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 0); idxs.add((short) 7); idxs.add((short) 4); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); if (client.cp == null) { fail("cp null"); } if (client.fds.length != 3) { fail("error fds.len:" + client.fds.length); } for (int i = 0; i < client.fds.length; i++) { if (client.fds[i] == null) { fail("null fd:" + i); } } if (client.list.size() != 3) { fail("error list size:" + client.list.size()); } if (!client.list.get(0).equals(byteFileName)) { fail("error filename:" + client.list.get(0)); } if (!client.list.get(1).equals(multiFileNameString)) { fail("error filename:" + client.list.get(1)); } if (!client.list.get(2).equals(floatFileName)) { fail("error filename:" + client.list.get(2)); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:ColumnStorageBasicTest.java
License:Open Source License
public void testGetRecordByLine() { try {//from w ww. j av a 2 s .c o m Configuration conf = new Configuration(); Path path = new Path(prefix); FileSystem fs = FileSystem.get(conf); ArrayList<Short> idxs = new ArrayList<Short>(10); idxs.add((short) 0); idxs.add((short) 7); idxs.add((short) 4); ColumnStorageClient client = new ColumnStorageClient(path, idxs, conf); Record record = client.getRecordByLine(-1); if (record != null) { fail("should return null record 1"); } record = client.getRecordByLine(10); if (record != null) { fail("should return null record 2"); } for (int i = 0; i < 10; i++) { record = client.getRecordByLine(i); if (record == null) { fail("should not return null record"); } if (record.fieldValues().size() != 5) { fail("error field num:" + record.fieldValues().size()); } record.show(); judgeNofixRecord(record, i); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:HadoopWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); Path inputPath = new Path(HadoopWordCount.class.getClassLoader().getResource("books").getPath()); Path outputPath = new Path(OUTPUT_PATH); // set up the Hadoop job config, the input and output paths and formats JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(TextInputFormat.class); jobConfig.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(jobConfig, outputPath); TextInputFormat.addInputPath(jobConfig, inputPath); // Delete the output directory, if already exists FileSystem.get(new Configuration()).delete(outputPath, true); JetConfig cfg = new JetConfig(); cfg.setInstanceConfig(new InstanceConfig() .setCooperativeThreadCount(Math.max(1, getRuntime().availableProcessors() / 2))); JetInstance jetInstance = Jet.newJetInstance(cfg); Jet.newJetInstance(cfg);/* w w w . j a v a 2s .c o m*/ try { System.out.print("\nCounting words from " + inputPath); long start = nanoTime(); jetInstance.newJob(buildDag(jobConfig)).execute().get(); System.out.print("Done in " + NANOSECONDS.toMillis(nanoTime() - start) + " milliseconds."); System.out.println("Output written to " + outputPath); } finally { Jet.shutdownAll(); } }
From source file:TestFS.java
License:Open Source License
public static void main(String[] args) throws IOException { FileSystem fs = FileSystem.get(new Configuration()); System.out.println(fs.toString()); }
From source file:ComputeCooccurrenceMatrixPairs.java
License:Apache License
/** * Runs this tool./*from ww w .j ava 2s . co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + ComputeCooccurrenceMatrixPairs.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(ComputeCooccurrenceMatrixPairs.class.getSimpleName()); job.setJarByClass(ComputeCooccurrenceMatrixPairs.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:JaqlShell.java
License:Apache License
/** * @param dir//from w w w . j a va2 s . co m * @param numNodes * @param format * @throws Exception */ public void init(String dir, int numNodes) throws Exception { String vInfo = VersionInfo.getVersion(); System.setProperty("test.build.data", dir); m_conf = new Configuration(); // setup conf according to the Hadoop version if (vInfo.indexOf("0.20") < 0) { throw new Exception("Unsupported Hadoop version: " + vInfo); } // setup the mini dfs cluster m_fs = new MiniDFSCluster(m_conf, numNodes, true, (String[]) null); FileSystem filesystem = m_fs.getFileSystem(); m_conf.set("fs.default.name", filesystem.getUri().toString()); Path parentdir = filesystem.getHomeDirectory(); filesystem.mkdirs(parentdir); //FSUtils.setVersion(filesystem, parentdir); // setup hbase cluster (only if OS is not windows) // if(!System.getProperty("os.name").toLowerCase().contains("win")) { // m_conf.set(HConstants.HBASE_DIR, parentdir.toString()); // Path hdfsTestDir = filesystem.makeQualified(new Path(m_conf.get(HConstants.HBASE_DIR))); // // // prime the hdfs for hbase information... // HRegion root = HRegion.createHRegion(HRegionInfo.ROOT_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, hdfsTestDir, (HBaseConfiguration)m_conf); // HRegion.addRegionToMETA(root, meta); // // // ... and close the root and meta // if (meta != null) { // meta.close(); // meta.getLog().closeAndDelete(); // } // if (root != null) { // root.close(); // root.getLog().closeAndDelete(); // } // // try // { // this.zooKeeperCluster = new MiniZooKeeperCluster(); // File testDir = new File(dir); // int clientPort = this.zooKeeperCluster.startup(testDir); // m_conf.set("hbase.zookeeper.property.clientPort", Integer.toString(clientPort)); // } catch(Exception e) { // LOG.error("Unable to startup zookeeper"); // throw new IOException(e); // } // try { // // start the mini cluster // m_base = new MiniHBaseCluster((HBaseConfiguration)m_conf, numNodes); // } catch(Exception e) { // LOG.error("Unable to startup hbase"); // throw new IOException(e); // } // try { // // opening the META table ensures that cluster is running // new HTable((HBaseConfiguration)m_conf, HConstants.META_TABLE_NAME); // // //setupOverride(conf); // } // catch (Exception e) // { // LOG.warn("Could not verify that hbase is up", e); // } // setupOverride(); // } m_mr = startMRCluster(numNodes, m_fs.getFileSystem().getName(), m_conf); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // make the home directory if it does not exist Path hd = fs.getWorkingDirectory(); if (!fs.exists(hd)) fs.mkdirs(hd); // make the $USER/_temporary directory if it does not exist Path tmpPath = new Path(hd, "_temporary"); if (!fs.exists(tmpPath)) fs.mkdirs(tmpPath); // if (m_base != null) // { // try { // m_admin = new HBaseAdmin((HBaseConfiguration) m_conf); // HTableDescriptor[] tables = m_admin.listTables(); // if (tables != null) // { // for (int i = 0; i < tables.length; i++) // { // m_admin.enableTable(tables[i].getName()); // } // } // } catch(Exception e) { // LOG.warn("failed to enable hbase tables"); // } // } }
From source file:JaqlShell.java
License:Apache License
/** * @throws Exception/*from w ww . ja va 2 s .c o m*/ */ public void init() throws Exception { // do nothing in the case of cluster //m_conf = new HBaseConfiguration(); //m_admin = new HBaseAdmin(m_conf); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // make the home directory if it does not exist Path hd = fs.getWorkingDirectory(); if (!fs.exists(hd)) fs.mkdirs(hd); }
From source file:SingleFileReader.java
License:Apache License
private void writeFile(String cached, String fSize) throws Exception { fileSize = Double.parseDouble((fSize.split("g|G"))[0]) * 1024 * 1024 * 1024; String hdfsFolder = "/hdfs_test/"; String hdfsFile = hdfsFolder + fSize; short replication = 1; boolean overWrite = true; int blockSize = 536870912; double numIters = fileSize / (double) bufferSize; Configuration conf = getConf(); fs = FileSystem.get(conf); hdfsFilePath = new Path(hdfsFile); OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize); /* Initialize byte buffer */ ByteBuffer buf = ByteBuffer.allocate(bufferSize); buf.order(ByteOrder.nativeOrder()); for (int k = 0; k < bufferSize / Integer.SIZE; k++) { buf.putInt(k);/*from w w w . j a va 2s . c o m*/ } buf.flip(); /* Write the content of the byte buffer to the HDFS file*/ t = new Timer(); t.start(0); for (long i = 0; i < numIters; i++) { os.write(buf.array()); buf.flip(); } t.end(0); os.close(); /* Check to see if the file needs to be cached */ t.start(1); if (cached.equals("cache")) { String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile + " -pool hdfs_test"; // System.out.println(cmdStr); Process p = Runtime.getRuntime().exec(cmdStr); p.waitFor(); String cmdOutLine = ""; StringBuffer cmdOutBuf = new StringBuffer(); BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream())); while ((cmdOutLine = cmdOutReader.readLine()) != null) { cmdOutBuf.append(cmdOutLine + "\n"); } // System.out.println(cmdOutBuf.toString()); } t.end(1); }
From source file:HoopRemoteTask.java
License:Open Source License
/** * *///from www.j av a 2s . c o m public static void main(String args[]) throws Exception { // run the HoopLink constructor; We need this to have a global settings registry @SuppressWarnings("unused") HoopLink link = new HoopLink(); dbg("main ()"); showTimeStamp(); /** * I've taken out the statistics portion since it relies on code that isn't distributed * The next version will have this solved. I might try the solution in: * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments * Although chances are I will switch to using Hoop to collect much better performance and distribution * statistics. See Hoop.java for more information */ HoopPerformanceMeasure metrics = new HoopPerformanceMeasure(); metrics.setMarker("main"); HoopLink.metrics.getDataSet().add(metrics); if (parseArgs(args) == false) { usage(); return; } if (HoopLink.postonly == true) { postOnly(); return; } if (HoopLink.task.equals("none") == true) { dbg("No task defined, please use the commandline option -task <task>"); return; } dbg("Starting system ..."); HoopRemoteTask driver = new HoopRemoteTask(); if (HoopLink.useHadoop == false) { dbg("Starting built-in mapper ..."); driver.indexDocuments(); } else { dbg("Starting hadoop job ..."); Configuration conf = new Configuration(); // TRANSFER SETTHoopGS FROM HoopLink to Configuration!!! transferConf(conf); // Now we're feeling much better HoopRemoteTask.hdfs = FileSystem.get(conf); if (HoopLink.dbglocal == true) { dbg("Enabling local debugging ..."); conf.set("mapred.job.tracker", "local"); } else dbg("Disabling local debugging"); JobConf job = new JobConf(conf, HoopRemoteTask.class); job.setJobName(driver.getClassName()); driver.setJob(job); @SuppressWarnings("unused") String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); job.setJarByClass(HoopRemoteTask.class); if (HoopLink.task.equals("invert") == true) { dbg("Configuring job for invert task ..."); job.setReducerClass(HoopInvertedListReducer.class); job.setMapperClass(HoopInvertedListMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); } if (HoopLink.task.equals("wordcount") == true) { dbg("Configuring job for wordcount task ..."); job.setReducerClass(HoopWordCountReducer.class); job.setMapperClass(HoopWordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); } dbg("Using input path: " + HoopLink.datapath); dbg("Using output path: " + HoopLink.outputpath); FileInputFormat.addInputPath(job, new Path(HoopLink.datapath)); FileOutputFormat.setOutputPath(job, new Path(HoopLink.outputpath)); job.setInputFormat(HoopWholeFileInputFormat.class); if ((HoopLink.shardcreate.equals("mos") == true) && (HoopLink.nrshards > 1)) { dbg("Setting output to sharded output streams class ..."); job.setOutputFormat(HoopShardedOutputFormat.class); } else job.setOutputFormat(TextOutputFormat.class); /** * Temporarily commented out for testing purposes */ //job.setPartitionerClass (HoopPartitioner.class); driver.register("Main"); JobClient.runJob(job); postProcess(conf); } showTimeStamp(); metrics.closeMarker(); long timeTaken = metrics.getYValue(); //long timeTaken=metrics.getMarkerRaw (); metrics.printMetrics(timeTaken); driver.unregister(); /** * I've taken out the statistics portion since it relies on code that isn't distributed * The next version will have this solved. I might try the solution in: * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments * Although chances are I will switch to using Hoop to collect much better performance and distribution * statistics. See Hoop.java for more information */ //stats.calcStatistics(); //dbg (stats.printStatistics()); }