List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testGetRecordByOrderSegment() { Segment segment = null;/* w w w . j a v a2 s . c om*/ try { FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); Head head = new Head(); head.setFieldMap(fieldMap); String fileName = prefix + "testGetRecordByValueSegment"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataOutputStream out = fs.create(path); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.setWorkStatus(ConstVar.WS_Write); fd.head = head; fd.setOut(out); IndexInfo info = new IndexInfo(); info.offset = 0; segment = new Segment(info, fd); int recordNum = 150000; for (int i = 0; i < recordNum; i++) { Record record = new Record(7); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); record.addValue(new FieldValue("hello konten", (short) 6)); segment.addRecord(record); record = null; } segment.persistent(out); out.close(); FSDataInputStream in = fs.open(path); fd.setIn(in); fd.setWorkStatus(ConstVar.WS_Read); info.offset = 0; info.len = segment.len(); Segment segment2 = new Segment(info, fd); FieldValue[] values = new FieldValue[2]; values[0] = new FieldValue((byte) 1, (short) 2); values[1] = new FieldValue((short) 2, (short) 3); Record[] records = segment2.getRecordByOrder(values, values.length); if (records != null) { fail("should get null, index error, records.len:" + records.length); } values[0] = new FieldValue((byte) 1, (short) 0); values[1] = new FieldValue((short) 3, (short) 1); records = segment2.getRecordByOrder(values, values.length); if (records != null) { fail("should get null, value error"); } values[0] = new FieldValue((byte) 1, (short) 0); values[1] = new FieldValue((short) 2, (short) 1); records = segment2.getRecordByOrder(values, values.length); if (records == null) { fail("should not get null"); } if (records.length != 150000) { fail("error result size:" + records.length); } for (int i = 0; i < 150000; i++) { judgeFixedRecord(records[i]); } records = segment2.getRecordByOrder(null, 0); if (records == null) { fail("should not get null"); } if (records.length != 150000) { fail("error result size:" + records.length); } for (int i = 0; i < 150000; i++) { judgeFixedRecord(records[i]); } } catch (IOException e) { e.printStackTrace(); fail("get IOException:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testSetNoPreFileName() { try {/*from w w w. j a v a 2 s . c om*/ Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); Head head = new Head(); String fileName = prefix + "testSetNoPreFileName"; fd.create(fileName, head); Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); if (!fs.isFile(path)) { fail("create file fail"); } } catch (IOException e) { e.printStackTrace(); fail("get ioexception:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testSetPreFileName() { try {// w w w . j av a 2 s . c om Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); Head head = new Head(); //String fileName = "hdfs://tdw-172-25-38-246:54310/user/tdwadmin/testSetPreFileName"; String fileName = "/user/tdwadmin/se_test/fs/basic/testSetPreFileName"; fd.create(fileName, head); fd.close(); Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); if (!fs.isFile(path)) { fail("create file fail"); } } catch (IOException e) { fail(e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testClose() { try {// ww w. ja v a 2 s. c o m Head head = new Head(); head.setVar((byte) 1); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(prefix + "testClose", head); int size = 100 * 10000; for (int i = 0; i < size; i++) { Record record = new Record(7); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); record.addValue(new FieldValue("hello konten", (short) 6)); fd.addRecord(record); } if (fd.recordNum() != size) { fail("error record num:" + fd.recordNum()); } if (fd.currentSegment().currentUnit() == null) { fail("null current unit"); } if (fd.currentSegment() == null) { fail("null current seg"); } if (fd.segmentNum() != 0) { fail("error segment num:" + fd.segmentNum()); } int headLen = head.len(); long currentUnitLen = fd.currentSegment().currentUnit().len(); long segmentLen = fd.currentSegment().len() + currentUnitLen + ConstVar.LineIndexRecordLen; long remain = fd.currentSegment().remain(); int unitNum = fd.currentSegment().unitNum(); fd.close(); int indexLen = ConstVar.LineIndexRecordLen * fd.segmentNum(); int metaLen = ConstVar.IndexMetaOffset; long fileLen = fd.getFileLen(); if (fileLen != headLen + segmentLen + indexLen + metaLen) { fail("error file len:" + fileLen); } if (fd.in() != null) { fail("in should set null"); } if (fd.out() != null) { fail("out should set null"); } if (fd.recordNum() != 0) { fail("record num should set 0"); } if (fd.keyIndexOffset != -1) { fail("key index offset not -1"); } if (fd.lineIndexOffset != -1) { fail("line index offset not -1"); } if (fd.currentOffset != -1) { fail("current offset not -1"); } if (fd.hasLoadAllSegmentDone) { fail("has load all segment Done not false"); } String fileName = prefix + "testClose"; Path path = new Path(fileName); FileSystem fs = FileSystem.get(new Configuration()); FSDataInputStream in = fs.open(path); long metaOffset = fileLen - ConstVar.IndexMetaOffset; in.seek(metaOffset); int recordNum = in.readInt(); int segNum = in.readInt(); long keyIndexOffset = in.readLong(); long lineIndexOffset = in.readLong(); if (recordNum != size) { fail("error record num:" + recordNum); } if (segNum != 1) { fail("error segNum:" + segNum); } if (keyIndexOffset != -1) { fail("error key index offset:" + keyIndexOffset); } if (lineIndexOffset != (headLen + segmentLen)) { fail("error line index offset:" + lineIndexOffset); } in.seek(lineIndexOffset); for (int i = 0; i < segNum; i++) { int beginLine = in.readInt(); int endLine = in.readInt(); long offset = in.readLong(); long len = in.readLong(); int idx = in.readInt(); if (beginLine != 0) { fail("error beginLine:" + beginLine); } if (endLine != size) { fail("error end line:" + endLine); } if (offset != head.len()) { fail("error offset:" + offset); } long tlen = size * full7chunkLen + size * 8 + ConstVar.DataChunkMetaOffset * (unitNum + 1) + 28 * (unitNum + 1) + 24; if (len != tlen) { fail("error len:" + len); } } } catch (IOException e) { e.printStackTrace(); fail("get ioexception:" + e.getMessage()); } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testNoFillLastSegment() { try {// ww w . ja va 2s. c o m String fileName = prefix + "testNoFillLastSegment"; Head head = new Head(); FieldMap fieldMap = new FieldMap(); fieldMap.addField(new Field(ConstVar.FieldType_Byte, ConstVar.Sizeof_Byte, (short) 0)); fieldMap.addField(new Field(ConstVar.FieldType_Short, ConstVar.Sizeof_Short, (short) 1)); fieldMap.addField(new Field(ConstVar.FieldType_Int, ConstVar.Sizeof_Int, (short) 2)); fieldMap.addField(new Field(ConstVar.FieldType_Long, ConstVar.Sizeof_Long, (short) 3)); fieldMap.addField(new Field(ConstVar.FieldType_Float, ConstVar.Sizeof_Float, (short) 4)); fieldMap.addField(new Field(ConstVar.FieldType_Double, ConstVar.Sizeof_Double, (short) 5)); fieldMap.addField(new Field(ConstVar.FieldType_String, 0, (short) 6)); head.setFieldMap(fieldMap); Configuration conf = new Configuration(); FormatDataFile fd = new FormatDataFile(conf); fd.create(fileName, head); Record record = new Record(7); record.addValue(new FieldValue((byte) 1, (short) 0)); record.addValue(new FieldValue((short) 2, (short) 1)); record.addValue(new FieldValue((int) 3, (short) 2)); record.addValue(new FieldValue((long) 4, (short) 3)); record.addValue(new FieldValue((float) 5.5, (short) 4)); record.addValue(new FieldValue((double) 6.6, (short) 5)); record.addValue(new FieldValue("hello konten", (short) 6)); fd.addRecord(record); fd.close(); FileSystem fs = FileSystem.get(conf); long fileLen = fs.getFileStatus(new Path(fileName)).getLen(); int tlen = head.len() + full7chunkLen + 8 + ConstVar.DataChunkMetaOffset + ConstVar.LineIndexRecordLen + ConstVar.IndexMetaOffset + ConstVar.LineIndexRecordLen + ConstVar.IndexMetaOffset; if (fileLen != tlen) { fail("error file len:" + fileLen); } FormatDataFile fd2 = new FormatDataFile(new Configuration()); fd2.open(fileName); if (fd2.recordNum() != 1) { fail("error record num:" + fd2.recordNum()); } if (fd2.segmentNum() != 1) { fail("error segment num:" + fd2.segmentNum()); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:FormatStorageBasicTest.java
License:Open Source License
public void testOpenNoRecord() { try {/*from w w w.j av a2 s.co m*/ String fileName = prefix + "testOpenNoRecord"; Head head = new Head(); FormatDataFile fd = new FormatDataFile(new Configuration()); fd.create(fileName, head); fd.close(); FileSystem fs = FileSystem.get(new Configuration()); long fileLen = fs.getFileStatus(new Path(fileName)).getLen(); if (fileLen != head.len() + ConstVar.IndexMetaOffset) { fail("error file len:" + fileLen); } FormatDataFile fd2 = new FormatDataFile(new Configuration()); fd2.open(fileName); if (fd2.recordNum() != 0) { fail("error record num:" + fd2.recordNum()); } if (fd2.segmentNum() != 0) { fail("error segment num:" + fd2.segmentNum()); } } catch (Exception e) { e.printStackTrace(); fail("get exception:" + e.getMessage()); } }
From source file:WikipediaForwardIndexBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//w w w .jav a2 s .c o m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - index file: " + indexFile); LOG.info(" - language: " + language); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName()); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } // Clean up. fs.delete(new Path(tmpPath), true); return 0; }
From source file:RunPageRankSchimmy.java
License:Apache License
private float phase1(String path, int i, int j, int n, boolean useCombiner, boolean useInmapCombiner, boolean useRange) throws Exception { Configuration conf = getConf(); String in = path + "/iter" + FORMAT.format(i); String out = path + "/iter" + FORMAT.format(j) + "t"; String outm = out + "-mass"; FileSystem fs = FileSystem.get(conf); // We need to actually count the number of part files to get the number // of partitions (because the directory might contain _log). int numPartitions = 0; for (FileStatus s : FileSystem.get(conf).listStatus(new Path(in))) { if (s.getPath().getName().contains("part-")) { numPartitions++;//from ww w. j a v a2 s . c om } } conf.setInt("NodeCount", n); Partitioner<IntWritable, Writable> p = null; if (useRange) { p = new RangePartitioner(); ((Configurable) p).setConf(conf); } else { p = new HashPartitioner<IntWritable, Writable>(); } // This is really annoying: the mapping between the partition numbers on // disk (i.e., part-XXXX) and what partition the file contains (i.e., // key.hash % #reducer) is arbitrary... so this means that we need to // open up each partition, peek inside to find out. IntWritable key = new IntWritable(); PageRankNode value = new PageRankNode(); FileStatus[] status = fs.listStatus(new Path(in)); StringBuilder sb = new StringBuilder(); for (FileStatus f : status) { if (!f.getPath().getName().contains("part-")) { continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(f.getPath())); reader.next(key, value); int np = p.getPartition(key, value, numPartitions); reader.close(); LOG.info(f.getPath() + "\t" + np); sb.append(np + "=" + f.getPath() + ";"); } LOG.info(sb.toString().trim()); LOG.info("PageRankSchimmy: iteration " + j + ": Phase1"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); LOG.info(" - nodeCnt: " + n); LOG.info(" - useCombiner: " + useCombiner); LOG.info(" - useInmapCombiner: " + useInmapCombiner); LOG.info(" - numPartitions: " + numPartitions); LOG.info(" - useRange: " + useRange); LOG.info("computed number of partitions: " + numPartitions); int numReduceTasks = numPartitions; conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("PageRankMassPath", outm); conf.set("BasePath", in); conf.set("PartitionMapping", sb.toString().trim()); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); Job job = Job.getInstance(conf); job.setJobName("PageRankSchimmy:iteration" + j + ":Phase1"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); if (useInmapCombiner) { job.setMapperClass(MapWithInMapperCombiningClass.class); } else { job.setMapperClass(MapClass.class); } if (useCombiner) { job.setCombinerClass(CombineClass.class); } if (useRange) { job.setPartitionerClass(RangePartitioner.class); } job.setReducerClass(ReduceClass.class); FileSystem.get(conf).delete(new Path(out), true); FileSystem.get(conf).delete(new Path(outm), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); float mass = Float.NEGATIVE_INFINITY; for (FileStatus f : fs.listStatus(new Path(outm))) { FSDataInputStream fin = fs.open(f.getPath()); mass = sumLogProbs(mass, fin.readFloat()); fin.close(); } return mass; }
From source file:RunPageRankSchimmy.java
License:Apache License
private void phase2(String path, int i, int j, int n, float missing) throws Exception { Configuration conf = getConf(); LOG.info("missing PageRank mass: " + missing); LOG.info("number of nodes: " + n); String in = path + "/iter" + FORMAT.format(j) + "t"; String out = path + "/iter" + FORMAT.format(j); LOG.info("PageRankSchimmy: iteration " + j + ": Phase2"); LOG.info(" - input: " + in); LOG.info(" - output: " + out); Job job = Job.getInstance(conf);/*from w ww . j a v a2s . c o m*/ job.setJobName("PageRankSchimmy:iteration" + j + ":Phase2"); job.setJarByClass(RunPageRankSchimmy.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MapPageRankMassDistributionClass.class); conf.setFloat("MissingMass", (float) missing); conf.setInt("NodeCount", n); FileSystem.get(conf).delete(new Path(out), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); }
From source file:TaskSearchWords.java
public static void main(String[] args) throws Exception { String hadoopServer = "ip-172-31-13-245.ap-southeast-1.compute.internal"; Configuration conf = new Configuration(); // this should be like defined in your mapred-site.xml conf.set("mapred.job.tracker", hadoopServer + ":54311"); // like defined in hdfs-site.xml conf.set("fs.default.name", "hdfs://" + hadoopServer + ":9000"); //setting mapred classes for HDFS to know which classes to process conf.set("mapreduce.map.class", "TokenizerMapper"); conf.set("mapreduce.reduce.class", "IntSumReducer"); //to prevent classdefnotfound exception conf.set("mapred.jar", "C:\\GitRepos\\OCR\\HadoopTasks\\dist\\HadoopTasks.jar"); //to pass parameters to mapred classes conf.set("RAWOCRCLOB", "Omeprazole_Cap E/C 10mg\n" + "Dressit Ster esDress\n" + "Flaminal Forte 15g\n" + "Co-Magaldrox_Susp 195mg/220mg/5ml S/F\n" + "Antacid/Oxetacaine_Oral Susp S/F\n" + "Simeticone_Susp 40mg/ml S/F\n" + "Infacol_Susp 40mg/ml S/F"); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(TaskSearchWords.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/user/ubuntu/MedicinesProcessed.csv")); FileSystem fs = FileSystem.get(conf); Path out = new Path("/user/ubuntu/processed/"); fs.delete(out, true);/*from ww w. j a v a2 s . c o m*/ //finally set the empty out path FileOutputFormat.setOutputPath(job, out); System.exit(job.waitForCompletion(true) ? 0 : 1); }