List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Move files from source to target using a specified starting partition. *///from ww w . j a va2 s. co m private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir, int partitionStart) throws IOException { NumberFormat numpart = NumberFormat.getInstance(); numpart.setMinimumIntegerDigits(PARTITION_DIGITS); numpart.setGroupingUsed(false); Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); FileStatus[] tempFiles = fs.listStatus(sourceDir); if (null == tempFiles) { // If we've already checked that the dir exists, and now it can't be // listed, this is a genuine error (permissions, fs integrity, or other). throw new IOException("Could not list files from " + sourceDir); } // Move and rename files & directories from temporary to target-dir thus // appending file's next partition for (FileStatus fileStat : tempFiles) { if (!fileStat.isDir()) { // Move imported data files String filename = fileStat.getPath().getName(); Matcher mat = patt.matcher(filename); if (mat.matches()) { String name = getFilename(filename); String fileToMove = name.concat(numpart.format(partitionStart++)); String extension = getFileExtension(filename); if (extension != null) { fileToMove = fileToMove.concat(extension); } LOG.debug("Filename: " + filename + " repartitioned to: " + fileToMove); fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove)); } } else { // Move directories (_logs & any other) String dirName = fileStat.getPath().getName(); Path path = new Path(targetDir, dirName); int dirNumber = 0; while (fs.exists(path)) { path = new Path(targetDir, dirName.concat("-").concat(numpart.format(dirNumber++))); } LOG.debug("Directory: " + dirName + " renamed to: " + path.getName()); fs.rename(fileStat.getPath(), path); } } }
From source file:com.cloudera.training.metrics.JobHistoryHelper.java
License:Apache License
public static JobHistory.JobInfo getJobInfoFromLocalFile(String outputFile, Configuration conf) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path outputFilePath = new Path(outputFile); String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(outputFilePath.getName()).split("_"); String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4]; JobHistory.JobInfo job = new JobHistory.JobInfo(jobId); DefaultJobHistoryParser.parseJobTasks(outputFile, job, fs); return job;// ww w . ja va 2 s .c om }
From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java
License:Apache License
/** * Register this resource. If the resource is a simple property (i.e. primative or String), * it will place it in the configuration. Otherwise, it uses the distributed cache * mechanism as required./*from w ww .jav a2 s .co m*/ * * @param key the resource key. Usually a property/field name. * @param value the resource. * @throws IOException if resource cannot be serialized * */ public void registerResource(String key, Object value) throws IOException { if (value == null) { return; } String valueString = null; // First, determine our approach: if (value instanceof String) { valueString = (String) value; } else if (Primitives.isWrapperType(value.getClass())) { valueString = String.valueOf(value); } // If this is a Path or File object we'll place it // on the distributed cache else if (value instanceof Path) { Path path = (Path) value; valueString = path.getName(); // Distribute the file the new way this.job.addCacheFile(path.toUri()); } else if (value instanceof File) { File file = (File) value; valueString = file.getName(); // Distribute the file distributeLocalFile(file); } // Check if it's serializable else if (value instanceof java.io.Serializable) { // Serialize the object and place it on the distributed cache ObjectOutputStream out = null; try { File beanSerFile = File.createTempFile(value.getClass().getName(), ".ser"); FileOutputStream fileOut = new FileOutputStream(beanSerFile); out = new ObjectOutputStream(fileOut); out.writeObject(value); valueString = beanSerFile.getName(); // Distribute the file distributeLocalFile(beanSerFile); } finally { IOUtils.closeQuietly(out); } } else { throw new IllegalArgumentException("Resource [" + key + "] is not serializable."); } // Setup the config key String configKey = CONFIGKEYBASE_RESOURCE + key; getConf().set(configKey, value.getClass().getName() + VALUE_SEP + valueString); }
From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java
License:Apache License
private static Path distributedFilePath(String fileName, Path[] distFiles) throws IOException { for (Path path : distFiles) { if (StringUtils.equals(fileName, path.getName())) { return path; }/* w w w . j a v a 2s . c om*/ } return null; }
From source file:com.cotdp.hadoop.BrotliFileRecordReader.java
License:Apache License
/** * Initialize and open the ZIP file from the FileSystem *//*from w w w . j av a2 s . c o m*/ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Set the file path as the key currentKey.set(path.getName()); // Open the stream fsin = fs.open(path); String cmd = "/bin/cat"; ProcessBuilder pb = new ProcessBuilder(); pb.redirectOutput(); pb.command(cmd); try { decompressor = pb.start(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.cqx.mr.MRSearchAuto.java
public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException { long startTime; long endTime; Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node2,node3,node4"); conf.set("fs.default.name", "hdfs://node1"); conf.set("mapred.job.tracker", "node1:54311"); /*// ww w .j ava2 s .c o m * ?map */ conf.set("search.license", "C87310"); conf.set("search.color", "10"); conf.set("search.direction", "2"); Job job = new Job(conf, "MRSearchHBase"); System.out.println("search.license: " + conf.get("search.license")); job.setNumReduceTasks(0); job.setJarByClass(MRSearchAuto.class); Scan scan = new Scan(); scan.addFamily(FAMILY_NAME); byte[] startRow = Bytes.toBytes("2011010100000"); byte[] stopRow; switch (numOfDays) { case 1: stopRow = Bytes.toBytes("2011010200000"); break; case 10: stopRow = Bytes.toBytes("2011011100000"); break; case 30: stopRow = Bytes.toBytes("2011020100000"); break; case 365: stopRow = Bytes.toBytes("2012010100000"); break; default: stopRow = Bytes.toBytes("2011010101000"); } // ?key scan.setStartRow(startRow); scan.setStopRow(stopRow); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class, Text.class, job); Path outPath = new Path("searchresult"); HDFS_File file = new HDFS_File(); file.DelFile(conf, outPath.getName(), true); // FileOutputFormat.setOutputPath(job, outPath);// startTime = System.currentTimeMillis(); job.waitForCompletion(true); endTime = System.currentTimeMillis(); System.out.println("Time used: " + (endTime - startTime)); System.out.println("startRow:" + Text.decode(startRow)); System.out.println("stopRow: " + Text.decode(stopRow)); }
From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java
License:Apache License
protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString) throws IndexDeployException { // get shard folders from source URI uri;/*from ww w. j a va 2 s . c o m*/ try { uri = new URI(indexPathString); } catch (final URISyntaxException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '" + indexPathString + "', make sure it starts with file:// or hdfs:// ", e); } FileSystem fileSystem; try { fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString())); } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to retrive file system for index path '" + indexPathString + "', make sure your path starts with hadoop support prefix like file:// or hdfs://", e); } List<Shard> shards = new ArrayList<>(); try { final Path indexPath = new Path(indexPathString); if (!fileSystem.exists(indexPath)) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index path '" + uri + "' does not exists"); } final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() { public boolean accept(final Path aPath) { return !aPath.getName().startsWith("."); } }); for (final FileStatus fileStatus : listStatus) { String shardPath = fileStatus.getPath().toString(); if (fileStatus.isDir() || shardPath.endsWith(".zip")) { shards.add(new Shard(createShardName(indexName, shardPath), shardPath)); } } } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "could not access index path: " + indexPathString, e); } if (shards.size() == 0) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard"); } return shards; }
From source file:com.datasalt.pangool.solr.SolrRecordWriter.java
License:Apache License
private Path findSolrConfig(Configuration conf) throws IOException { Path solrHome = null;/*ww w . j a v a 2 s. c o m*/ // we added these lines to make this patch work on Hadoop 0.20.2 FileSystem localFs = FileSystem.getLocal(conf); if (FileSystem.get(conf).equals(localFs)) { return new Path(localSolrHome); } // end-of-addition Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); if (localArchives.length == 0) { throw new IOException(String.format("No local cache archives, where is %s", zipName)); } for (Path unpackedDir : localArchives) { // Only logged if debugging if (LOG.isDebugEnabled()) { LOG.debug(String.format("Examining unpack directory %s for %s", unpackedDir, zipName)); ProcessBuilder lsCmd = new ProcessBuilder( new String[] { "/bin/ls", "-lR", unpackedDir.toString() }); lsCmd.redirectErrorStream(); Process ls = lsCmd.start(); try { byte[] buf = new byte[16 * 1024]; InputStream all = ls.getInputStream(); int count; while ((count = all.read(buf)) > 0) { System.err.write(buf, 0, count); } } catch (IOException ignore) { } System.err.format("Exit value is %d%n", ls.exitValue()); } if (unpackedDir.getName().equals(zipName)) { solrHome = unpackedDir; break; } } return solrHome; }
From source file:com.datasalt.utils.commons.TestRepoTool.java
License:Apache License
@Test public void test() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path repo = new Path("repoTest87463829"); HadoopUtils.deleteIfExists(fs, repo); RepoTool tool = new RepoTool(repo, "pkg", fs); assertNull(tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); Path pkg1 = tool.newPackage(); assertEquals("pkg", pkg1.getName().substring(0, 3)); assertEquals(pkg1.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); Path pkg2 = tool.newPackage(); assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.NOT_DEFINED)); assertEquals(2, tool.getPackages().length); RepoTool.setStatus(fs, pkg2, PackageStatus.FINISHED); assertEquals(pkg2.makeQualified(fs), tool.getNewestPackageWithStatus(PackageStatus.FINISHED)); HadoopUtils.deleteIfExists(fs, repo); }
From source file:com.datatorrent.lib.io.fs.AbstractFileSplitter.java
License:Apache License
/** * Creates file-metadata and populates no. of blocks in the metadata. * * @param fileInfo file information/* w w w . jav a2s. c o m*/ * @return file-metadata * @throws IOException */ protected FileMetadata buildFileMetadata(FileInfo fileInfo) throws IOException { LOG.debug("file {}", fileInfo.getFilePath()); FileMetadata fileMetadata = createFileMetadata(fileInfo); LOG.debug("fileMetadata {}", fileMetadata); Path path = new Path(fileInfo.getFilePath()); fileMetadata.setFileName(path.getName()); FileStatus status = getFileStatus(path); fileMetadata.setDirectory(status.isDirectory()); fileMetadata.setFileLength(status.getLen()); if (fileInfo.getDirectoryPath() == null) { // Direct filename is given as input. fileMetadata.setRelativePath(status.getPath().getName()); } else { String relativePath = getRelativePathWithFolderName(fileInfo); fileMetadata.setRelativePath(relativePath); } if (!status.isDirectory()) { int noOfBlocks = (int) ((status.getLen() / blockSize) + (((status.getLen() % blockSize) == 0) ? 0 : 1)); if (fileMetadata.getDataOffset() >= status.getLen()) { noOfBlocks = 0; } fileMetadata.setNumberOfBlocks(noOfBlocks); populateBlockIds(fileMetadata); } return fileMetadata; }