List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.blackberry.logdriver.admin.HFind.java
License:Apache License
@Override public int run(String[] args) throws Exception { final long startTime = System.currentTimeMillis(); int i = 0;//from w w w. java 2 s. c o m while (i < args.length) { if (args[i].startsWith("-")) { break; } Path path = new Path(args[i]); FileSystem fs = path.getFileSystem(getConf()); FileStatus[] fileStatuses = fs.globStatus(path); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { paths.add(fileStatus.getPath()); fileStatusCache.put(fileStatus.getPath(), fileStatus); } } i++; } while (i < args.length) { // -print action if ("-print".equals(args[i])) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } // -delete action if ("-delete".equals(args[i])) { actions.add(new FileStatusFilter() { @SuppressWarnings("deprecation") @Override public boolean accept(FileStatus fileStatus) { try { FileSystem fs = fileStatus.getPath().getFileSystem(getConf()); if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) { return fs.delete(fileStatus.getPath(), true); } } catch (IOException e) { e.printStackTrace(); } return false; } }); } // -atime test else if ("-atime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -atime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -mtime test else if ("-mtime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mtime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -amin test else if ("-amin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -amin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -mmin test else if ("-mmin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mmin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -regex test else if ("-regex".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -regex"); System.exit(1); } final Pattern p = Pattern.compile(args[i]); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if (p.matcher(fileStatus.getPath().toString()).matches()) { return true; } else { return false; } } }); } i++; } if (actions.size() == 0) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } search(); return 0; }
From source file:com.blackberry.logdriver.admin.HFind.java
License:Apache License
@SuppressWarnings("deprecation") private void search() throws IOException { Set<Path> seen = new HashSet<Path>(); while (paths.size() > 0) { // Check if the top of the list has any children. If so, add them to the // stack. If not, then process it. Path p = paths.peekFirst(); FileSystem fs = p.getFileSystem(getConf()); FileStatus fileStatus = fileStatusCache.get(p); // Only check if we haven't seen this before. if (fileStatus.isDir() && seen.contains(p) == false) { FileStatus[] fileStatuses = fs.listStatus(p); if (fileStatuses != null && fileStatuses.length > 0) { for (FileStatus x : fileStatuses) { paths.addFirst(x.getPath()); fileStatusCache.put(x.getPath(), x); }/* www. j av a 2s . co m*/ seen.add(p); continue; } } // If we get here, then we should be processing the path. p = paths.removeFirst(); // If we're processing it, we won't need it's status in the cache anymore. fileStatusCache.remove(p); boolean match = true; for (FileStatusFilter test : tests) { try { if (test.accept(fileStatus) == false) { match = false; break; } } catch (Throwable t) { t.printStackTrace(); System.err.println("path=" + p + " fileStatus=" + fileStatus); } } if (match == false) { continue; } for (FileStatusFilter action : actions) { try { if (action.accept(fileStatus) == false) { match = false; break; } } catch (Throwable t) { t.printStackTrace(); System.err.println("path=" + p + " fileStatus=" + fileStatus); } } } }
From source file:com.blackberry.logdriver.mapred.boom.ReBoomRecordWriter.java
License:Apache License
public ReBoomRecordWriter(ReBoomOutputFormat reBoomOutputFormat, JobConf job) throws IOException { String taskid = job.get("mapred.task.id"); Path path = BinaryOutputFormat.getTaskOutputPath(job, taskid + ".bm"); FSDataOutputStream out = path.getFileSystem(job).create(path); writer = new ReBoomWriter(out); }
From source file:com.blm.orc.FileDump.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); List<String> files = new ArrayList<String>(); List<Integer> rowIndexCols = null; for (String arg : args) { if (arg.startsWith("--")) { if (arg.startsWith(ROWINDEX_PREFIX)) { String[] colStrs = arg.substring(ROWINDEX_PREFIX.length()).split(","); rowIndexCols = new ArrayList<Integer>(colStrs.length); for (String colStr : colStrs) { rowIndexCols.add(Integer.parseInt(colStr)); }//from w w w .j a v a2 s. com } else { System.err.println("Unknown argument " + arg); } } else { files.add(arg); } } for (String filename : files) { System.out.println("Structure for " + filename); Path path = new Path(filename); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); System.out.println( "File Version: " + reader.getFileVersion().getName() + " with " + reader.getWriterVersion()); RecordReaderImpl rows = (RecordReaderImpl) reader.rows(); System.out.println("Rows: " + reader.getNumberOfRows()); System.out.println("Compression: " + reader.getCompression()); if (reader.getCompression() != CompressionKind.NONE) { System.out.println("Compression size: " + reader.getCompressionSize()); } System.out.println("Type: " + reader.getObjectInspector().getTypeName()); System.out.println("\nStripe Statistics:"); Metadata metadata = reader.getMetadata(); for (int n = 0; n < metadata.getStripeStatistics().size(); n++) { System.out.println(" Stripe " + (n + 1) + ":"); StripeStatistics ss = metadata.getStripeStatistics().get(n); for (int i = 0; i < ss.getColumnStatistics().length; ++i) { System.out.println(" Column " + i + ": " + ss.getColumnStatistics()[i].toString()); } } ColumnStatistics[] stats = reader.getStatistics(); System.out.println("\nFile Statistics:"); for (int i = 0; i < stats.length; ++i) { System.out.println(" Column " + i + ": " + stats[i].toString()); } System.out.println("\nStripes:"); int stripeIx = -1; for (StripeInformation stripe : reader.getStripes()) { ++stripeIx; long stripeStart = stripe.getOffset(); System.out.println(" Stripe: " + stripe.toString()); OrcProto.StripeFooter footer = rows.readStripeFooter(stripe); long sectionStart = stripeStart; for (OrcProto.Stream section : footer.getStreamsList()) { System.out.println(" Stream: column " + section.getColumn() + " section " + section.getKind() + " start: " + sectionStart + " length " + section.getLength()); sectionStart += section.getLength(); } for (int i = 0; i < footer.getColumnsCount(); ++i) { OrcProto.ColumnEncoding encoding = footer.getColumns(i); StringBuilder buf = new StringBuilder(); buf.append(" Encoding column "); buf.append(i); buf.append(": "); buf.append(encoding.getKind()); if (encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY || encoding.getKind() == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { buf.append("["); buf.append(encoding.getDictionarySize()); buf.append("]"); } System.out.println(buf); } if (rowIndexCols != null) { RowIndex[] indices = rows.readRowIndex(stripeIx); for (int col : rowIndexCols) { StringBuilder buf = new StringBuilder(); buf.append(" Row group index column ").append(col).append(":"); RowIndex index = null; if ((col >= indices.length) || ((index = indices[col]) == null)) { buf.append(" not found\n"); continue; } for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) { buf.append("\n Entry ").append(entryIx).append(":"); RowIndexEntry entry = index.getEntry(entryIx); if (entry == null) { buf.append("unknown\n"); continue; } OrcProto.ColumnStatistics colStats = entry.getStatistics(); if (colStats == null) { buf.append("no stats at "); } else { ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats); Object min = RecordReaderImpl.getMin(cs), max = RecordReaderImpl.getMax(cs); buf.append(" count: ").append(cs.getNumberOfValues()); buf.append(" min: ").append(min); buf.append(" max: ").append(max); } buf.append(" positions: "); for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) { if (posIx != 0) { buf.append(","); } buf.append(entry.getPositions(posIx)); } } System.out.println(buf); } } } FileSystem fs = path.getFileSystem(conf); long fileLen = fs.getContentSummary(path).getLength(); long paddedBytes = getTotalPaddingSize(reader); // empty ORC file is ~45 bytes. Assumption here is file length always >0 double percentPadding = ((double) paddedBytes / (double) fileLen) * 100; DecimalFormat format = new DecimalFormat("##.##"); System.out.println("\nFile length: " + fileLen + " bytes"); System.out.println("Padding length: " + paddedBytes + " bytes"); System.out.println("Padding ratio: " + format.format(percentPadding) + "%"); rows.close(); } }
From source file:com.blm.orc.OrcFile.java
License:Apache License
/** * Create an ORC file writer. This is the public interface for creating * writers going forward and new options will only be added to this method. * @param path filename to write to//from ww w .j av a2 s . co m * @param opts the options * @return a new ORC file writer * @throws IOException */ public static Writer createWriter(Path path, WriterOptions opts) throws IOException { FileSystem fs = opts.fileSystemValue == null ? path.getFileSystem(opts.configuration) : opts.fileSystemValue; return new WriterImpl(fs, path, opts.configuration, opts.inspectorValue, opts.stripeSizeValue, opts.compressValue, opts.bufferSizeValue, opts.rowIndexStrideValue, opts.memoryManagerValue, opts.blockPaddingValue, opts.versionValue, opts.callback, opts.encodingStrategy, opts.compressionStrategy, opts.paddingTolerance, opts.blockSizeValue); }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
static List<OrcSplit> generateSplitsInfo(Configuration conf) throws IOException { // use threads to resolve directories into splits Context context = new Context(conf); for (Path dir : getInputPaths(conf)) { FileSystem fs = dir.getFileSystem(conf); context.schedule(new FileGenerator(context, fs, dir)); }// w ww. ja va 2 s .co m context.waitForTasks(); // deal with exceptions if (!context.errors.isEmpty()) { List<IOException> errors = new ArrayList<IOException>(context.errors.size()); for (Throwable th : context.errors) { if (th instanceof IOException) { errors.add((IOException) th); } else { throw new RuntimeException("serious problem", th); } } throw new InvalidInputException(errors); } if (context.cacheStripeDetails) { LOG.info("FooterCacheHitRatio: " + context.cacheHitCounter.get() + "/" + context.numFilesCounter.get()); } return context.splits; }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
@Override public RawReader<OrcStruct> getRawReader(Configuration conf, boolean collapseEvents, int bucket, ValidTxnList validTxnList, Path baseDirectory, Path[] deltaDirectory) throws IOException { Reader reader = null;/*from ww w . j av a 2 s .co m*/ boolean isOriginal = false; if (baseDirectory != null) { Path bucketFile; if (baseDirectory.getName().startsWith(AcidUtils.BASE_PREFIX)) { bucketFile = AcidUtils.createBucketFile(baseDirectory, bucket); } else { isOriginal = true; bucketFile = findOriginalBucket(baseDirectory.getFileSystem(conf), baseDirectory, bucket); } reader = OrcFile.createReader(bucketFile, OrcFile.readerOptions(conf)); } return new OrcRawRecordMerger(conf, collapseEvents, reader, isOriginal, bucket, validTxnList, new Reader.Options(), deltaDirectory); }
From source file:com.blm.orc.OrcRawRecordMerger.java
License:Apache License
/** * Create a reader that merge sorts the ACID events together. * @param conf the configuration/*ww w .j a v a 2s . c o m*/ * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @throws IOException */ OrcRawRecordMerger(Configuration conf, boolean collapseEvents, Reader reader, boolean isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) throws IOException { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; // modify the optins to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) eventOptions.searchArgument(null, null); if (deltaDirectory != null) { for (Path delta : deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (fs.exists(deltaFile) && length != -1) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); ReaderPair deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, eventOptions); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }
From source file:com.blm.orc.OrcRecordUpdater.java
License:Apache License
OrcRecordUpdater(Path path, AcidOutputFormat.Options options) throws IOException { this.options = options; this.bucket.set(options.getBucket()); this.path = AcidUtils.createFilename(path, options); FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); }//www.j a v a 2 s.c o m this.fs = fs; try { FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false); strm.writeInt(ORC_ACID_VERSION); strm.close(); } catch (IOException ioe) { if (LOG.isDebugEnabled()) { LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " + ioe); } } if (options.getMinimumTransactionId() != options.getMaximumTransactionId() && !options.isWritingBase()) { flushLengths = fs.create(getSideFile(this.path), true, 8, options.getReporter()); } else { flushLengths = null; } OrcFile.WriterOptions writerOptions = null; if (options instanceof OrcOptions) { writerOptions = ((OrcOptions) options).getOrcOptions(); } if (writerOptions == null) { writerOptions = OrcFile.writerOptions(options.getConfiguration()); } writerOptions.fileSystem(fs).callback(indexBuilder); if (!options.isWritingBase()) { writerOptions.blockPadding(false); writerOptions.bufferSize(DELTA_BUFFER_SIZE); writerOptions.stripeSize(DELTA_STRIPE_SIZE); } rowInspector = (StructObjectInspector) options.getInspector(); writerOptions.inspector(createEventSchema(findRecId(options.getInspector(), options.getRecordIdColumn()))); this.writer = OrcFile.createWriter(this.path, writerOptions); item = new OrcStruct(FIELDS); item.setFieldValue(OPERATION, operation); item.setFieldValue(CURRENT_TRANSACTION, currentTransaction); item.setFieldValue(ORIGINAL_TRANSACTION, originalTransaction); item.setFieldValue(BUCKET, bucket); item.setFieldValue(ROW_ID, rowId); }
From source file:com.blm.orc.ReaderImpl.java
License:Apache License
/** * Constructor that let's the user specify additional options. * @param path pathname for file// w w w.j ava2s .com * @param options options for reading * @throws IOException */ ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); } this.fileSystem = fs; this.path = path; this.conf = options.getConfiguration(); FileMetaInfo footerMetaData; if (options.getFileMetaInfo() != null) { footerMetaData = options.getFileMetaInfo(); } else { footerMetaData = extractMetaInfoFromFooter(fs, path, options.getMaxLength()); } MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.metadata = rInfo.metadata; this.footer = rInfo.footer; this.inspector = rInfo.inspector; this.versionList = footerMetaData.versionList; this.writerVersion = footerMetaData.writerVersion; }