List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:boa.datagen.MapFileGen.java
License:Apache License
public static void main(String[] args) throws Exception { if (SEQ_FILE_PATH.isEmpty()) { System.out.println("Missing path to sequence file. Please specify it in the properties file."); return;//from w ww . j a v a 2 s . com } String base = "hdfs://boa-njt/"; Configuration conf = new Configuration(); conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); Path path = new Path(SEQ_FILE_PATH); String name = path.getName(); if (fs.isFile(path)) { if (path.getName().equals(MapFile.DATA_FILE_NAME)) { MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf); } else { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); Path dir = new Path(path.getParent(), name); fs.mkdirs(dir); fs.rename(dataFile, new Path(dir, dataFile.getName())); MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf); } } else { FileStatus[] files = fs.listStatus(path); for (FileStatus file : files) { path = file.getPath(); if (fs.isFile(path)) { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf); break; } } } fs.close(); }
From source file:boa.datagen.SeqSortMerge.java
License:Apache License
public static void main(String[] args) throws IOException { conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07-sorted/"; while (true) { FileStatus[] files = fs.listStatus(new Path(inPath)); if (files.length < 2) break; Path path = new Path(inPath + System.currentTimeMillis()); fs.mkdirs(path);/*www.jav a 2 s . com*/ SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class); FileStatus[] candidates = getCandidates(files); System.out.println("Merging " + candidates.length + " from " + files.length); SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length]; for (int i = 0; i < candidates.length; i++) readers[i] = new SequenceFile.Reader(fs, new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf); Text[] keys = new Text[candidates.length]; BytesWritable[] values = new BytesWritable[candidates.length]; read(readers, keys, values); while (true) { int index = min(keys); if (keys[index].toString().isEmpty()) break; w.append(keys[index], values[index]); read(readers[index], keys[index], values[index]); } for (int i = 0; i < readers.length; i++) readers[i].close(); w.close(); for (int i = 0; i < readers.length; i++) fs.delete(new Path(inPath + candidates[i].getPath().getName()), true); } }
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;/* w w w.j a v a 2s. c om*/ if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
private void loadHFiles() throws Exception { conf = HBaseConfiguration.create();// ww w.j a v a 2 s. c o m HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //} FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("A"); desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { hadmin.disableTable(TABLE_NAME); hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
/** * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are * changed to match the local 'from' path. * <p/>/*from w w w . j a v a2s . c o m*/ * Returns a map of file-name to remote modification times if the remote time is different than the local time. * * @param config * @param commonPaths * @param syncTimes */ public static Map<String, Long> syncPaths(Configuration config, Map<Path, Path> commonPaths, boolean syncTimes) { if (commonPaths == null) return Collections.emptyMap(); Map<String, Long> timestampMap = new HashMap<>(); Map<Path, Path> copyPaths = getCopyPaths(config, commonPaths); // tests remote file existence or if stale LocalFileSystem localFS = getLocalFS(config); FileSystem remoteFS = getDefaultFS(config); for (Map.Entry<Path, Path> entry : copyPaths.entrySet()) { Path localPath = entry.getKey(); Path remotePath = entry.getValue(); try { LOG.info("copying from: {}, to: {}", localPath, remotePath); remoteFS.copyFromLocalFile(localPath, remotePath); if (!syncTimes) { timestampMap.put(remotePath.getName(), remoteFS.getFileStatus(remotePath).getModificationTime()); continue; } } catch (IOException exception) { throw new FlowException("unable to copy local: " + localPath + " to remote: " + remotePath, exception); } FileStatus localFileStatus = null; try { // sync the modified times so we can lazily upload jars to hdfs after job is started // otherwise modified time will be local to hdfs localFileStatus = localFS.getFileStatus(localPath); remoteFS.setTimes(remotePath, localFileStatus.getModificationTime(), -1); // don't set the access time } catch (IOException exception) { LOG.info( "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.", remotePath); if (localFileStatus != null) timestampMap.put(remotePath.getName(), localFileStatus.getModificationTime()); } } return timestampMap; }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static void resolvePaths(Configuration config, Collection<String> classpath, String remoteRoot, String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths) { FileSystem defaultFS = getDefaultFS(config); FileSystem localFS = getLocalFS(config); Path remoteRootPath = new Path(remoteRoot == null ? "./.staging" : remoteRoot); if (resourceSubPath != null) remoteRootPath = new Path(remoteRootPath, resourceSubPath); remoteRootPath = defaultFS.makeQualified(remoteRootPath); boolean defaultIsLocal = defaultFS.equals(localFS); for (String stringPath : classpath) { Path path = new Path(stringPath); URI uri = path.toUri();/* w ww . j a v a 2s. c o m*/ if (uri.getScheme() == null && !defaultIsLocal) // we want to sync { Path localPath = localFS.makeQualified(path); if (!exists(localFS, localPath)) throw new FlowException("path not found: " + localPath); String name = localPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; localPaths.put(name, localPath); remotePaths.put(name, defaultFS.makeQualified(new Path(remoteRootPath, path.getName()))); } else if (localFS.equals(getFileSystem(config, path))) { if (!exists(localFS, path)) throw new FlowException("path not found: " + path); Path localPath = localFS.makeQualified(path); String name = localPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; localPaths.put(name, localPath); } else { if (!exists(defaultFS, path)) throw new FlowException("path not found: " + path); Path defaultPath = defaultFS.makeQualified(path); String name = defaultPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; remotePaths.put(name, defaultPath); } } }
From source file:cascading.platform.hadoop.BaseHadoopPlatform.java
License:Open Source License
@Override public void copyToLocal(String outputFile) throws IOException { if (!isUseCluster()) return;/*from w ww . j a v a2 s.com*/ Path path = new Path(safeFileName(outputFile)); if (!fileSys.exists(path)) throw new FileNotFoundException("data file not found: " + outputFile); File file = new File(outputFile); if (file.exists()) file.delete(); if (fileSys.isFile(path)) { // its a file, so just copy it over FileUtil.copy(fileSys, path, file, false, configuration); return; } // it's a directory file.mkdirs(); FileStatus contents[] = fileSys.listStatus(path); for (FileStatus fileStatus : contents) { Path currentPath = fileStatus.getPath(); if (currentPath.getName().startsWith("_")) // filter out temp and log dirs continue; FileUtil.copy(fileSys, currentPath, new File(file, currentPath.getName()), false, configuration); } }
From source file:cascading.tap.hadoop.BaseDistCacheTap.java
License:Open Source License
@Override public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input) throws IOException { // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) { LOG.info("delegating to parent"); return super.openForRead(flowProcess, input); }//from ww w. j ava 2 s . c o m Path[] cachedFiles = getLocalCacheFiles(flowProcess); if (cachedFiles == null || cachedFiles.length == 0) return super.openForRead(flowProcess, null); List<Path> paths = new ArrayList<>(); List<Tap> taps = new ArrayList<>(); if (isSimpleGlob()) { FileSystem fs = FileSystem.get(flowProcess.getConfig()); FileStatus[] statuses = fs.globStatus(getHfs().getPath()); for (FileStatus status : statuses) paths.add(status.getPath()); } else { paths.add(getHfs().getPath()); } for (Path pathToFind : paths) { for (Path path : cachedFiles) { if (path.toString().endsWith(pathToFind.getName())) { LOG.info("found {} in distributed cache", path); taps.add(new Lfs(getScheme(), path.toString())); } } } if (paths.isEmpty()) // not in cache, read from HDFS { LOG.info("could not find files in local resource path. delegating to parent: {}", super.getIdentifier()); return super.openForRead(flowProcess, input); } return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input); }
From source file:chaohBIM.ZipFileRecordReader.java
License:Apache License
/** * Initialise and open the ZIP file from the FileSystem *//*from www.j a v a 2s .co m*/ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Open the stream fsin = fs.open(path); zip = new ZipInputStream(fsin); zipfilename = path.getName().replaceAll(".zip", ""); //System.out.println(zipfilename); }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_SECOND)) { paths.put(pathName, path);/* www . j a va2 s .c o m*/ } } File outFile = new File(outPath, Names.indexId2Node + ".dat"); OutputStream out = new FileOutputStream(outFile); for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile()); InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat")); IOUtils.copyBytes(in, out, configuration, false); in.close(); } out.close(); }