List of usage examples for org.apache.hadoop.fs Path equals
@Override public boolean equals(Object o)
From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static boolean isSameOrParent(Path parent, Path child) { int parentDepth = parent.depth(); int childDepth = child.depth(); if (parentDepth > childDepth) { return false; }/*w w w .j a v a 2s. com*/ for (int i = childDepth; i > parentDepth; i--) { child = child.getParent(); } return parent.equals(child); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystem.java
License:Open Source License
@Override public URI getGcsPath(Path hadoopPath) { LOG.debug("GHFS.getGcsPath: {}", hadoopPath); // Convert to fully qualified absolute path; the Path object will callback to get our current // workingDirectory as part of fully resolving the path. Path resolvedPath = hadoopPath.makeQualified(this); // Handle root. if (resolvedPath.equals(getFileSystemRoot())) { return GoogleCloudStorageFileSystem.GCS_ROOT; }/*from ww w .j a va 2 s . c o m*/ // Need to convert scheme to GCS scheme and possibly move bucket into authority String authorityString = null; if (!Strings.isNullOrEmpty(resolvedPath.toUri().getAuthority())) { authorityString = "/" + resolvedPath.toUri().getAuthority(); } else { authorityString = ""; } // Construct GCS path uri. String path = GoogleCloudStorageFileSystem.SCHEME + ":/" + authorityString + resolvedPath.toUri().getPath(); URI gcsPath = null; try { gcsPath = new URI(path); } catch (URISyntaxException e) { String msg = String.format("Invalid path: %s", hadoopPath); throw new IllegalArgumentException(msg, e); } LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath); return gcsPath; }
From source file:com.hadoop.mapreduce.FourMcInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;/*from ww w . ja v a 2 s . c o m*/ FourMcBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMcBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMcBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMcBlockIndex.NOT_FOUND && fourMcEnd != FourMcBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mc split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }
From source file:com.hadoop.mapreduce.FourMzInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;/*from w ww . ja va 2 s . c o m*/ FourMzBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMzBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMzBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMzBlockIndex.NOT_FOUND && fourMcEnd != FourMzBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mz split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }
From source file:com.hdfs.concat.clean.Clean.java
License:Apache License
public void warnOrDelete(Path p) throws IOException { if (conf.getBoolean(WARN_MODE, false)) { System.out.println("DELETE " + p); } else {/*from w w w . j a v a 2s.c om*/ if (p.equals(new Path(conf.get(TARGET_DIR)))) { } else { fs.delete(p); } } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
/** * Specific method because we need to set the input converter class according to the * input infos. Note that any mapper instruction before reblock can work on binary block * if it can work on binary cell as well. * //from www . j a v a 2s.c o m * @param job * @param inputIndexes * @param inputs * @param inputInfos * @param inBlockRepresentation * @param brlens * @param bclens * @param setConverter * @param forCMJob * @throws Exception */ public static void setUpMultipleInputsReblock(JobConf job, byte[] inputIndexes, String[] inputs, InputInfo[] inputInfos, int[] brlens, int[] bclens) throws Exception { if (inputs.length != inputInfos.length) throw new Exception("number of inputs and inputInfos does not match"); //set up names of the input matrices and their inputformat information job.setStrings(INPUT_MATRICIES_DIRS_CONFIG, inputs); MRJobConfiguration.setMapFunctionInputMatrixIndexes(job, inputIndexes); for (int i = 0; i < inputs.length; i++) { ConvertTarget target = ConvertTarget.CELL; if (inputInfos[i] == InputInfo.BinaryBlockInputInfo) target = ConvertTarget.BLOCK; setInputInfo(job, inputIndexes[i], inputInfos[i], brlens[i], bclens[i], target); } //remove redundant input files ArrayList<Path> paths = new ArrayList<Path>(); for (int i = 0; i < inputs.length; i++) { String name = inputs[i]; Path p = new Path(name); boolean redundant = false; for (Path ep : paths) if (ep.equals(p)) { redundant = true; break; } if (redundant) continue; MultipleInputs.addInputPath(job, p, inputInfos[i].inputFormatClass); paths.add(p); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void preserveFileAttributes(Configuration conf) throws IOException { String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); LOG.info("About to preserve attributes: " + attrSymbols); EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); long preservedEntries = 0; try {/*from w w w .j a v a2 s. co m*/ FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); while (sourceReader.next(srcRelPath, srcFileStatus)) { if (!srcFileStatus.isDir()) continue; Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); //Skip the root folder, preserve the status after atomic commit is complete //If it is changed any earlier, then atomic commit may fail if (targetRoot.equals(targetFile)) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes); HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); } LOG.info("Preserved status on " + preservedEntries + " dir entries on target"); }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private Path getTmpFile(Path target, Mapper.Context context) { Path targetWorkPath = new Path( HadoopCompat.getTaskConfiguration(context).get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent() : targetWorkPath; LOG.info("Creating temp file: " + new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString())); return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()); }
From source file:com.inmobi.databus.readers.CollectorStreamReader.java
License:Apache License
public Message readLine() throws IOException, InterruptedException { if (closed) { LOG.info("Stream closed"); return null; }// w w w . j a v a 2 s . c o m Message line = readNextLine(); while (line == null) { // reached end of file? LOG.info("Read " + getCurrentFile() + " with lines:" + currentLineNum); if (closed) { LOG.info("Stream closed"); break; } Path lastFile = getLastFile(); // rebuild file list only if local stream is available because some files // may move to local stream if (isLocalStreamAvailable || !hasNextFile()) { build(); // rebuild file list } if (!hasNextFile()) { //there is no next files // stop reading if it read till stopTime if (hasReadFully()) { LOG.info("read all files till stop date"); break; } if (!setIterator()) { LOG.info("Could not find current file in the stream"); if (isWithinStream(getCurrentFile().getName()) || !isLocalStreamAvailable) { LOG.info("Staying in collector stream as earlier files still exist"); startFromNextHigherAndOpen(getCurrentFile().getName()); updateLatestMinuteAlreadyReadForCollectorReader(); LOG.info("Reading from the next higher file"); } else { LOG.info("Current file would have been moved to Local Stream"); return null; } } else { waitForFlushAndReOpen(); LOG.info("Reading from the same file after reopen"); } } else { // reopen a file only if the file is last file on the stream // and local stream is not available if (moveToNext || (lastFile != null && !(lastFile.equals(getCurrentFile())))) { setNextFile(); updateLatestMinuteAlreadyReadForCollectorReader(); LOG.info("Reading from next file: " + getCurrentFile()); } else { LOG.info("Reading from same file before moving to next"); // open the same file reOpen(); moveToNext = true; } } line = readNextLine(); } return line; }
From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java
License:Apache License
/** * @returns Zero if checkpoint is not present for that minute or * checkpoint file and current file were not same. * Line number from checkpoint// w ww . j a va 2s . c o m */ @Override protected long getLineNumberForFirstFile(FileStatus firstFile) { int minute = getMinuteFromFile(firstFile); PartitionCheckpoint partitionChkPoint = pChkpoints.get(Integer.valueOf(minute)).pck; if (partitionChkPoint != null) { Path checkPointedFileName = new Path(streamDir, partitionChkPoint.getFileName()); // check whether current file and checkpoint file are same if (checkPointedFileName.equals(firstFile.getPath())) { return partitionChkPoint.getLineNum(); } } return 0; }