List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
/** * Method to qualify the checkpoint path based on the readurl configured * for the source cluster. The readurl of the cluster can change and the * checkpoint paths should be re-qualified to the new source cluster read * path.//from ww w. ja va 2 s .c o m * * @param lastCheckPointPath path which can be null read from checkpoint * file. * @param srcCluster the cluster for which checkpoint file which should be * re-qualified. * @return path which is re-qualified. */ protected Path fullyQualifyCheckPointWithReadURL(Path lastCheckPointPath, Cluster srcCluster) { //if checkpoint value was empty or null just fall thro' let the service // determine the new path. if (lastCheckPointPath == null) { return null; } String readUrl = srcCluster.getReadUrl(); URI checkpointURI = lastCheckPointPath.toUri(); String unQualifiedPathStr = checkpointURI.getPath(); Path newCheckPointPath = new Path(readUrl, unQualifiedPathStr); return newCheckPointPath; }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Get default name of the copy listing file. Use the meta folder * to create the copy listing file// w w w . j av a2 s. com * * @return - Path where the copy listing file has to be saved * @throws IOException - Exception if any */ protected Path getFileListingPath() throws IOException { String fileListPathStr = metaFolder + "/fileList.seq"; Path path = new Path(fileListPathStr); return new Path(path.toUri().normalize().toString()); }
From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java
License:Apache License
/** * Gets relative path of child path with respect to a root path * For ex. If childPath = /tmp/abc/xyz/file and * sourceRootPath = /tmp/abc// w w w . j a v a 2s.c om * Relative path would be /xyz/file * If childPath = /file and * sourceRootPath = / * Relative path would be /file * @param sourceRootPath - Source root path * @param childPath - Path for which relative path is required * @return - Relative portion of the child path (always prefixed with / * unless it is empty */ public static String getRelativePath(Path sourceRootPath, Path childPath) { String childPathString = childPath.toUri().getPath(); String sourceRootPathString = sourceRootPath.toUri().getPath(); return sourceRootPathString.equals("/") ? childPathString : childPathString.substring(sourceRootPathString.length()); }
From source file:com.inmobi.messaging.consumer.util.ConsumerUtil.java
License:Apache License
public static void testConsumerStartUp(ClientConfig config, String streamName, String consumerName, boolean hadoop, Date absoluteStartTime, Path rootDir, String chkpointPathPrefix) throws Exception { AbstractMessagingDatabusConsumer consumer = createConsumer(hadoop); // consumer config has both relative start time and absolute start time consumer.init(streamName, consumerName, absoluteStartTime, config); Assert.assertEquals(consumer.getTopicName(), streamName); Assert.assertEquals(consumer.getConsumerName(), consumerName); // consumer is starting from relative start time int i;/*from w w w. j a v a2 s . c om*/ for (i = 0; i < 120; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.mark(); for (i = 120; i < 130; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.reset(); // consumer starts consuming messages from the checkpoint for (i = 120; i < 240; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.close(); Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(), 250); consumer = createConsumer(hadoop); config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster"); consumer.init(streamName, consumerName, absoluteStartTime, config); // consumer starts consuming messages from the checkpoint for (i = 120; i < 240; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.mark(); ConsumerCheckpoint temp = consumer.getCurrentCheckpoint(); Map<PartitionId, PartitionCheckpoint> lastCheckpoint = new HashMap<PartitionId, PartitionCheckpoint>(); Map<Integer, Checkpoint> checkpointMap = new HashMap<Integer, Checkpoint>(); //create consumer checkpoint createCheckpointList(temp, checkpointMap, lastCheckpoint, consumer); for (i = 240; i < 260; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.close(); Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(), 140); consumer = createConsumer(hadoop); if (!hadoop) { config = ClientConfig.loadFromClasspath(MessageConsumerFactory.MESSAGE_CLIENT_CONF_FILE); config.set(DatabusConsumer.checkpointDirConfig, new Path(chkpointPathPrefix, "random-databus").toString()); config.set(DatabusConsumerConfig.databusRootDirsConfig, rootDir.toUri().toString()); config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster"); } else { config = ClientConfig.loadFromClasspath("messaging-consumer-hadoop-conf.properties"); config.set(HadoopConsumer.checkpointDirConfig, new Path(chkpointPathPrefix, "random-hadoop").toString()); config.set(HadoopConsumerConfig.rootDirsConfig, rootDir.toString()); config.set(MessagingConsumerConfig.clustersNameConfig, "testCluster"); } // consumer starts from absolute start time consumer.init(streamName, consumerName, absoluteStartTime, config); for (i = 100; i < 300; i++) { Message msg = consumer.next(); Assert.assertEquals(getMessage(msg.getData().array(), hadoop), MessageUtil.constructMessage(i)); } consumer.mark(); consumer.close(); Assert.assertEquals(((BaseMessageConsumerStatsExposer) (consumer.getMetrics())).getNumMessagesConsumed(), 200); }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Make a path relative with respect to a root path. * absPath is always assumed to descend from root. * Otherwise returned path is null./* ww w.j a va2 s . c o m*/ */ static String makeRelative(Path root, Path absPath) { if (!absPath.isAbsolute()) { throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath); } String p = absPath.toUri().getPath(); StringTokenizer pathTokens = new StringTokenizer(p, "/"); for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens .hasMoreTokens();) { if (!rootTokens.nextToken().equals(pathTokens.nextToken())) { return null; } } StringBuilder sb = new StringBuilder(); for (; pathTokens.hasMoreTokens();) { sb.append(pathTokens.nextToken()); if (pathTokens.hasMoreTokens()) { sb.append(Path.SEPARATOR); } } return sb.length() == 0 ? "." : sb.toString(); }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*ww w . j ava2s. c om*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.knewton.mapreduce.SSTableColumnRecordReaderTest.java
License:Apache License
@Test public void testNextKeyValue() throws Exception { Path inputPath = inputSplit.getPath(); FileSystem remoteFS = FileSystem.get(inputPath.toUri(), conf); FileSystem localFS = FileSystem.getLocal(conf); TaskAttemptContext context = getTaskAttemptContext(); ssTableColumnRecordReader.initialize(inputSplit, context); verify(ssTableColumnRecordReader).copyTablesToLocal(remoteFS, localFS, inputPath, context); assertEquals(0, ssTableColumnRecordReader.getProgress(), 0); assertTrue(ssTableColumnRecordReader.nextKeyValue()); assertEquals(key.getKey(), ssTableColumnRecordReader.getCurrentKey()); assertEquals(value, ssTableColumnRecordReader.getCurrentValue()); assertEquals(0.5, ssTableColumnRecordReader.getProgress(), 0); assertTrue(ssTableColumnRecordReader.nextKeyValue()); assertEquals(key.getKey(), ssTableColumnRecordReader.getCurrentKey()); assertEquals(value, ssTableColumnRecordReader.getCurrentValue()); assertEquals(1, ssTableColumnRecordReader.getProgress(), 0); assertFalse(ssTableColumnRecordReader.nextKeyValue()); assertNull(ssTableColumnRecordReader.getCurrentKey()); assertNull(ssTableColumnRecordReader.getCurrentValue()); }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Performs all the necessary actions to initialize and prepare this record reader. *///from ww w . ja v a 2 s .c o m @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { this.ctx = context; conf = context.getConfiguration(); keysRead = 0; components = Sets.newHashSetWithExpectedSize(3); FileSplit split = (FileSplit) inputSplit; validateConfiguration(conf); // Get comparator. Subcomparator can be null. AbstractType<?> comparator = getConfComparator(conf); AbstractType<?> subcomparator = getConfSubComparator(conf); // Get partitioner for keys IPartitioner partitioner = getConfPartitioner(conf); // Move minimum required db tables to local disk. Path dataTablePath = split.getPath(); FileSystem remoteFS = FileSystem.get(dataTablePath.toUri(), conf); FileSystem localFS = FileSystem.getLocal(conf); copyTablesToLocal(remoteFS, localFS, dataTablePath, context); CFMetaData cfMetaData; if (getConfIsSparse(conf)) { cfMetaData = CFMetaData.sparseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator); } else { cfMetaData = CFMetaData.denseCFMetaData(getDescriptor().ksname, getDescriptor().cfname, comparator, subcomparator); } // Open table and get scanner SSTableReader tableReader = openSSTableReader(partitioner, cfMetaData); setTableScanner(tableReader); }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Moves all the minimum required tables for the table reader to work to local disk. * * @param split The table to work on.//from w w w .j a v a 2s.c o m */ @VisibleForTesting void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String hdfsDataTablePathStr = dataTablePath.toUri().getPath(); String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator + dataTablePath.toUri().getPath(); // Make path relative due to EMR permissions if (localDataTablePathStr.startsWith("/")) { String mapTaskId = conf.get("mapreduce.task.attempt.id"); String mapTempDir = conf.get("mapreduce.cluster.temp.dir"); String taskWorkDir = mapTempDir + File.separator + mapTaskId; LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr); localDataTablePathStr = taskWorkDir + localDataTablePathStr; } Path localDataTablePath = new Path(localDataTablePathStr); LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(), localDataTablePath.toUri()); copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath); boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false); if (isCompressed) { decompress(localDataTablePath, context); } components.add(Component.DATA); desc = Descriptor.fromFilename(localDataTablePathStr); Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr); String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX); components.add(Component.PRIMARY_INDEX); Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX)); LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath); copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath); if (isCompressed) { decompress(localIdxPath, context); } String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name()); Path compressionTablePath = new Path(compressionTablePathStr); if (remoteFS.exists(compressionTablePath)) { Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name())); LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(), localCompressionPath); copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath); if (isCompressed) { decompress(localCompressionPath, context); } components.add(Component.COMPRESSION_INFO); } }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Decompresses input files that were snappy compressed before opening them with the sstable * reader. It writes a new decompressed file with the same name as the compressed one. The old * one gets deleted.//w w w. j av a2 s.c om */ private void decompress(Path localTablePath, TaskAttemptContext context) throws IOException { context.setStatus(String.format("Decompressing %s", localTablePath.toUri())); int compressionBufSize = context.getConfiguration().getInt(PropertyConstants.DECOMPRESS_BUFFER.txt, DEFAULT_DECOMPRESS_BUFFER_SIZE); compressionBufSize *= 1024; LOG.info("Decompressing {} with buffer size {}.", localTablePath, compressionBufSize); File compressedFile = new File(localTablePath.toString()); InputStream fis = new FileInputStream(compressedFile); InputStream bis = new BufferedInputStream(fis, compressionBufSize); InputStream sip = new SnappyInputStream(bis); File decompressedFile = new File(localTablePath.toString() + ".tmp"); OutputStream os = new FileOutputStream(decompressedFile); OutputStream bos = new BufferedOutputStream(os, compressionBufSize); byte[] inByteArr = new byte[compressionBufSize]; int bytesRead = 0; int bytesSinceLastReport = 0; while ((bytesRead = sip.read(inByteArr)) > 0) { bos.write(inByteArr, 0, bytesRead); bytesSinceLastReport += bytesRead; // Avoid timeouts. Report progress to the jobtracker. if (bytesSinceLastReport % REPORT_DECOMPRESS_PROGRESS_EVERY_GBS > 0) { context.setStatus(String.format("Decompressed %d bytes.", bytesSinceLastReport)); bytesSinceLastReport -= REPORT_DECOMPRESS_PROGRESS_EVERY_GBS; } } sip.close(); bos.close(); compressedFile.delete(); decompressedFile.renameTo(compressedFile); }