List of usage examples for org.apache.hadoop.fs Path suffix
public Path suffix(String suffix)
From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java
License:Apache License
private static void checkMRResultFilesRecursive(Configuration configuration, Path outputDir, String[][] data, String prefix) throws IOException { FileSystem fs = FileSystem.get(configuration); // output exists? FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter()); assertEquals(1, fileStatuses.length); // only one assertEquals("part-m-00000", fileStatuses[0].getPath().getName()); Map<String, String> fileToData = Maps.newHashMap(); String currentPath = prefix;/*from ww w .j a va 2s . c om*/ for (String[] aData : data) { currentPath += Path.SEPARATOR + aData[0]; fileToData.put(currentPath + Path.SEPARATOR + "file.txt", aData[1]); } // read a chunk to check content SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(), true, configuration); try { while (iterator.hasNext()) { Pair<Text, Text> record = iterator.next(); System.out.printf("MR-Recur > Trying to check: %s\n", record.getFirst().toString().trim()); String retrievedData = fileToData.get(record.getFirst().toString().trim()); assertNotNull(retrievedData); assertEquals(retrievedData, record.getSecond().toString().trim()); } } finally { Closeables.close(iterator, true); } }
From source file:org.apache.mrql.Evaluator.java
License:Apache License
/** dump MRQL data into a sequence file */ public void dump(String file, Tree type, MRData data) throws Exception { Path path = new Path(file); FileSystem fs = path.getFileSystem(Plan.conf); PrintStream ftp = new PrintStream(fs.create(path.suffix(".type"))); ftp.print("2@" + type.toString() + "\n"); ftp.close();//w w w . j av a2 s .com SequenceFile.Writer writer = new SequenceFile.Writer(fs, Plan.conf, path, MRContainer.class, MRContainer.class); if (data instanceof MR_dataset) data = Plan.collect(((MR_dataset) data).dataset()); if (data instanceof Bag) { Bag s = (Bag) data; long i = 0; for (MRData e : s) { counter_key.set(i++); value_container.set(e); writer.append(counter_container, value_container); } } else { counter_key.set(0); value_container.set(data); writer.append(counter_container, value_container); } ; writer.close(); }
From source file:org.apache.mrql.Evaluator.java
License:Apache License
/** for dumped data to a file, return the MRQL type of the data */ public Tree get_type(String file) { try {/*w w w .j a v a 2 s. co m*/ Path path = new Path(file); FileSystem fs = path.getFileSystem(Plan.conf); BufferedReader ftp = new BufferedReader(new InputStreamReader(fs.open(path.suffix(".type")))); String s[] = ftp.readLine().split("@"); ftp.close(); if (s.length != 2) return null; if (!s[0].equals("2")) throw new Error("The binary file has been created in java mode and cannot be read in hadoop mode"); return Tree.parse(s[1]); } catch (Exception e) { return null; } }
From source file:org.apache.tajo.storage.TestCompressionStorages.java
License:Apache License
private void storageCompressionTest(StoreType storeType, Class<? extends CompressionCodec> codec) throws IOException { Schema schema = new Schema(); schema.addColumn("id", Type.INT4); schema.addColumn("age", Type.FLOAT4); schema.addColumn("name", Type.TEXT); TableMeta meta = CatalogUtil.newTableMeta(storeType); meta.putOption("compression.codec", codec.getCanonicalName()); meta.putOption("compression.type", SequenceFile.CompressionType.BLOCK.name()); meta.putOption("rcfile.serde", TextSerializerDeserializer.class.getName()); meta.putOption("sequencefile.serde", TextSerializerDeserializer.class.getName()); String fileName = "Compression_" + codec.getSimpleName(); Path tablePath = new Path(testDir, fileName); Appender appender = ((FileStorageManager) StorageManager.getFileStorageManager(conf)).getAppender(meta, schema, tablePath);// w w w .j a v a 2s . c om appender.enableStats(); appender.init(); String extension = ""; if (appender instanceof CSVFile.CSVAppender) { extension = ((CSVFile.CSVAppender) appender).getExtension(); } else if (appender instanceof DelimitedTextFile.DelimitedTextFileAppender) { extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension(); } int tupleNum = 100000; VTuple vTuple; for (int i = 0; i < tupleNum; i++) { vTuple = new VTuple(3); vTuple.put(0, DatumFactory.createInt4(i + 1)); vTuple.put(1, DatumFactory.createFloat4((float) i)); vTuple.put(2, DatumFactory.createText(String.valueOf(i))); appender.addTuple(vTuple); } appender.close(); TableStats stat = appender.getStats(); assertEquals(tupleNum, stat.getNumRows().longValue()); tablePath = tablePath.suffix(extension); FileStatus status = fs.getFileStatus(tablePath); long fileLen = status.getLen(); FileFragment[] tablets = new FileFragment[1]; tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen); Scanner scanner = StorageManager.getFileStorageManager(conf).getScanner(meta, schema, tablets[0], schema); if (StoreType.CSV == storeType) { if (SplittableCompressionCodec.class.isAssignableFrom(codec)) { assertTrue(scanner.isSplittable()); } else { assertFalse(scanner.isSplittable()); } } scanner.init(); if (storeType == StoreType.SEQUENCEFILE) { assertTrue(scanner instanceof SequenceFileScanner); Writable key = ((SequenceFileScanner) scanner).getKey(); assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName()); } int tupleCnt = 0; Tuple tuple; while ((tuple = scanner.next()) != null) { tupleCnt++; } scanner.close(); assertEquals(tupleNum, tupleCnt); assertNotSame(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); }
From source file:org.apache.tajo.storage.TestLineReader.java
License:Apache License
@Test public void testLineDelimitedReader() throws IOException { TajoConf conf = new TajoConf(); Path testDir = CommonTestingUtil.getTestDir(TEST_PATH); FileSystem fs = testDir.getFileSystem(conf); Schema schema = new Schema(); schema.addColumn("id", Type.INT4); schema.addColumn("age", Type.INT8); schema.addColumn("comment", Type.TEXT); schema.addColumn("comment2", Type.TEXT); TableMeta meta = CatalogUtil.newTableMeta(StoreType.TEXTFILE); meta.putOption("compression.codec", DeflateCodec.class.getCanonicalName()); Path tablePath = new Path(testDir, "line1." + DeflateCodec.class.getSimpleName()); FileAppender appender = (FileAppender) StorageManager.getFileStorageManager(conf).getAppender(null, null, meta, schema, tablePath);// ww w .j a va 2s.c o m appender.enableStats(); appender.init(); int tupleNum = 10000; VTuple vTuple; long splitOffset = 0; for (int i = 0; i < tupleNum; i++) { vTuple = new VTuple(4); vTuple.put(0, DatumFactory.createInt4(i + 1)); vTuple.put(1, DatumFactory.createInt8(25l)); vTuple.put(2, DatumFactory.createText("emiya muljomdao")); vTuple.put(3, NullDatum.get()); appender.addTuple(vTuple); if (i == (tupleNum / 2)) { splitOffset = appender.getOffset(); } } String extension = ((DelimitedTextFile.DelimitedTextFileAppender) appender).getExtension(); appender.close(); tablePath = tablePath.suffix(extension); FileFragment fragment = new FileFragment("table", tablePath, 0, splitOffset); DelimitedLineReader reader = new DelimitedLineReader(conf, fragment); // if file is compressed, will read to EOF assertTrue(reader.isCompressed()); assertFalse(reader.isReadable()); reader.init(); assertTrue(reader.isReadable()); int i = 0; while (reader.isReadable()) { ByteBuf buf = reader.readLine(); if (buf == null) break; i++; } IOUtils.cleanup(null, reader, fs); assertEquals(tupleNum, i); }
From source file:org.apache.tajo.storage.v2.TestCSVCompression.java
License:Apache License
public void testSplitCompressionData() throws IOException { Schema schema = new Schema(); schema.addColumn("id", TajoDataTypes.Type.INT4); schema.addColumn("age", TajoDataTypes.Type.INT8); TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV); meta.putOption("compression.codec", BZip2Codec.class.getCanonicalName()); Path tablePath = new Path(testDir, "SplitCompression"); Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); appender.enableStats();/* w w w . java 2 s . com*/ appender.init(); String extention = ""; if (appender instanceof CSVFile.CSVAppender) { extention = ((CSVFile.CSVAppender) appender).getExtension(); } int tupleNum = 100000; VTuple vTuple; for (int i = 0; i < tupleNum; i++) { vTuple = new VTuple(2); vTuple.put(0, DatumFactory.createInt4(i + 1)); vTuple.put(1, DatumFactory.createInt8(25l)); appender.addTuple(vTuple); } appender.close(); TableStats stat = appender.getStats(); assertEquals(tupleNum, stat.getNumRows().longValue()); tablePath = tablePath.suffix(extention); FileStatus status = fs.getFileStatus(tablePath); long fileLen = status.getLen(); long randomNum = (long) (Math.random() * fileLen) + 1; FileFragment[] tablets = new FileFragment[2]; tablets[0] = new FileFragment("SplitCompression", tablePath, 0, randomNum); tablets[1] = new FileFragment("SplitCompression", tablePath, randomNum, (fileLen - randomNum)); Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0], schema); scanner.init(); int tupleCnt = 0; Tuple tuple; while ((tuple = scanner.next()) != null) { tupleCnt++; } scanner.close(); scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema); scanner.init(); while ((tuple = scanner.next()) != null) { tupleCnt++; } scanner.close(); assertEquals(tupleNum, tupleCnt); }
From source file:org.apache.tajo.storage.v2.TestCSVCompression.java
License:Apache License
private void storageCompressionTest(CatalogProtos.StoreType storeType, Class<? extends CompressionCodec> codec) throws IOException { Schema schema = new Schema(); schema.addColumn("id", TajoDataTypes.Type.INT4); schema.addColumn("age", TajoDataTypes.Type.INT8); TableMeta meta = CatalogUtil.newTableMeta(storeType); meta.putOption("compression.codec", codec.getCanonicalName()); String fileName = "Compression_" + codec.getSimpleName(); Path tablePath = new Path(testDir, fileName); Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); appender.enableStats();//from w ww . j ava 2 s.c o m appender.init(); String extension = ""; if (appender instanceof CSVFile.CSVAppender) { extension = ((CSVFile.CSVAppender) appender).getExtension(); } int tupleNum = 10000; VTuple vTuple; for (int i = 0; i < tupleNum; i++) { vTuple = new VTuple(2); vTuple.put(0, DatumFactory.createInt4(i + 1)); vTuple.put(1, DatumFactory.createInt8(25l)); appender.addTuple(vTuple); } appender.close(); TableStats stat = appender.getStats(); assertEquals(tupleNum, stat.getNumRows().longValue()); tablePath = tablePath.suffix(extension); FileStatus status = fs.getFileStatus(tablePath); long fileLen = status.getLen(); FileFragment[] tablets = new FileFragment[1]; tablets[0] = new FileFragment(fileName, tablePath, 0, fileLen); Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0], schema); scanner.init(); int tupleCnt = 0; while (scanner.next() != null) { tupleCnt++; } scanner.close(); assertEquals(tupleCnt, tupleNum); }
From source file:org.apache.tez.runtime.library.common.shuffle.orderedgrouped.MapOutput.java
License:Apache License
public static MapOutput createDiskMapOutput(InputAttemptIdentifier attemptIdentifier, MergeManager merger, long size, Configuration conf, int fetcher, boolean primaryMapOutput, TezTaskOutputFiles mapOutputFile) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path outputpath = mapOutputFile.getInputFileForWrite(attemptIdentifier.getInputIdentifier().getInputIndex(), size);/* w w w . j a va 2 s . com*/ // Files are not clobbered due to the id being appended to the outputPath in the tmpPath, // otherwise fetches for the same task but from different attempts would clobber each other. Path tmpOuputPath = outputpath.suffix(String.valueOf(fetcher)); long offset = 0; MapOutput mapOutput = new MapOutput(Type.DISK, attemptIdentifier, merger, size, outputpath, offset, primaryMapOutput, fs, tmpOuputPath); mapOutput.disk = mapOutput.localFS.create(tmpOuputPath); return mapOutput; }
From source file:org.seqdoop.hadoop_bam.cli.plugins.View.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("view :: PATH not given."); return 3; }//from ww w . j a va 2 s.c om Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "view"); final String path = args.get(0); final List<String> regions = args.subList(1, args.size()); final boolean headerOnly = parser.getBoolean(headerOnlyOpt); final SAMFileReader reader; try { final Path p = new Path(path); SeekableStream idx; try { idx = WrapSeekable.openPath(getConf(), p.suffix(".bai")); } catch (Exception e) { idx = null; } final SeekableStream sam = WrapSeekable.openPath(getConf(), p); reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false); } catch (Exception e) { System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage()); return 4; } reader.setValidationStringency(ValidationStringency.SILENT); final SAMFileHeader header; try { header = reader.getFileHeader(); } catch (SAMFormatException e) { System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage()); return 4; } final String fmt = (String) parser.getOptionValue(formatOpt); final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH)); final SAMFileWriterImpl writer; switch (format) { case BAM: // BAM output inside view no longer supported since Picard made the class private System.err.println("BAM output inside view no longer supported"); return 1; case SAM: writer = new SAMTextWriter(System.out); break; default: writer = null; assert false; } writer.setSortOrder(header.getSortOrder(), true); writer.setHeader(header); if (regions.isEmpty() || headerOnly) { if (!headerOnly) if (!writeIterator(writer, reader.iterator(), path)) return 4; writer.close(); return 0; } if (!reader.isBinary()) { System.err.println("view :: Cannot output regions from SAM file"); return 4; } if (!reader.hasIndex()) { System.err.println("view :: Cannot output regions from BAM file lacking an index"); return 4; } reader.enableIndexCaching(true); boolean errors = false; for (final String region : regions) { final StringTokenizer st = new StringTokenizer(region, ":-"); final String refStr = st.nextToken(); final int beg, end; if (st.hasMoreTokens()) { beg = parseCoordinate(st.nextToken()); end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1; if (beg < 0 || end < 0) { errors = true; continue; } if (end < beg) { System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end); errors = true; continue; } } else beg = end = 0; SAMSequenceRecord ref = header.getSequence(refStr); if (ref == null) try { ref = header.getSequence(Integer.parseInt(refStr)); } catch (NumberFormatException e) { } if (ref == null) { System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr); errors = true; continue; } final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end); if (!writeIterator(writer, it, path)) return 4; } writer.close(); return errors ? 5 : 0; }
From source file:org.springframework.data.hadoop.store.strategy.naming.CodecFileNamingStrategy.java
License:Apache License
@Override public Path resolve(Path path) { CodecInfo c = getCodecInfo();/*from www . j av a2 s . co m*/ String suffix = c != null ? "." + c.getDefaultSuffix() : ""; if (path != null) { return path.suffix(suffix); } else if (StringUtils.hasText(suffix)) { return new Path(suffix); } else { return path; } }