List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java
License:BSD License
public static String[] searchDataAttributes(final String name, Configuration conf, boolean s3) throws IOException { PathFilter filter = new PathFilter() { @Override/*from w w w . j av a2 s. c om*/ public boolean accept(Path arg0) { if (arg0.getName().contains("_SUCCESS")) return false; return true; } }; Path path = null; FileSystem fs = null; if (s3) { path = new Path(conf.get("bucket") + dataAttributesDir + "/" + name); fs = FileSystem.get(path.toUri(), conf); } else { fs = FileSystem.get(new Configuration()); path = new Path(fs.getHomeDirectory() + "/" + dataAttributesDir + "/" + name); } FileStatus[] status; try { status = fs.listStatus(path, filter); } catch (FileNotFoundException e) { return new String[0]; } if (s3) fs.close(); String[] names = new String[status.length]; String fileName = ""; for (int i = 0; i < status.length; i++) { fileName = status[i].getPath().getName(); names[i] = fileName; } return names; }
From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java
License:BSD License
public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass)); for (FileStatus status : fs.listStatus(fromDirectory)) { if (status.isDirectory()) { System.out.println("Skip directory '" + status.getPath().getName() + "'"); continue; }/* www. j a va 2 s. co m*/ Path file = status.getPath(); if (file.getName().startsWith("_")) { System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders continue; } //System.out.println("Merging '" + file.getName() + "'"); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.append(key, value); } reader.close(); } writer.close(); }
From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java
License:BSD License
public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException { String fromDirectory = args[0]; String toEventsDirectory = args[1]; String toOutliersDirectory = args[2]; String metadataFile = args[3]; // Detecting datasets. HashSet<String> datasets = new HashSet<String>(); FileReader fileReader = new FileReader(metadataFile); BufferedReader bufferedReader = new BufferedReader(fileReader); String line;//from ww w . j ava2s . c om while ((line = bufferedReader.readLine()) != null) { String[] parts = line.split(","); datasets.add(parts[0]); } bufferedReader.close(); // Downloading relationships. String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})"; Pattern relationshipPattern = Pattern.compile(relationshipPatternStr); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem localFS = FileSystem.getLocal(conf); for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) { if (!status.isDirectory()) { continue; } Path file = status.getPath(); Matcher m = relationshipPattern.matcher(file.getName()); if (!m.find()) continue; String ds1 = m.group(1); String ds2 = m.group(2); if (!datasets.contains(ds1)) continue; if (!datasets.contains(ds2)) continue; for (FileStatus statusDir : fs.listStatus(file)) { if (!statusDir.isDirectory()) { continue; } Path fromPath = statusDir.getPath(); String toPathStr; if (fromPath.getName().contains("events")) { toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } else { toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } Path toPath = new Path(toPathStr); System.out.println("Copying:"); System.out.println(" From: " + fromPath.toString()); System.out.println(" To: " + toPath.toString()); FileUtil.copyMerge(fs, // HDFS File System fromPath, // HDFS path localFS, // Local File System toPath, // Local Path false, // Do not delete HDFS path conf, // Configuration null); } } }
From source file:edu.nyu.vida.data_polygamy.utils.MergeFiles.java
License:BSD License
public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass)); for (FileStatus status : fs.listStatus(fromDirectory)) { if (status.isDirectory()) { System.out.println("Skip directory '" + status.getPath().getName() + "'"); continue; }// www. ja va 2 s . c om Path file = status.getPath(); if (file.getName().startsWith("_")) { System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders continue; } SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.append(key, value); } reader.close(); } writer.close(); }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException { if (!fs.isDirectory(path)) return new FileStatus[] { fs.getFileStatus(path) }; else {//from ww w.j av a2 s . co m // Get all files in directory that are not directories or hidden files final FileSystem fsFinal = fs; PathFilter filter = new PathFilter() { public boolean accept(Path p) { try { return !(fsFinal.isDirectory(p) || p.getName().startsWith(".") || p.getName().startsWith("_")); } catch (IOException ex) { throw new RuntimeException("Error filtering files.", ex); } } }; return fs.listStatus(path, filter); } }
From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java
License:Apache License
/** * Upload External libraries and functions to HDFS for an instance to use when started * @throws IllegalStateException// ww w . ja v a2 s . c o m * @throws IOException */ private void installExtLibs() throws IllegalStateException, IOException { FileSystem fs = FileSystem.get(conf); if (!instanceExists()) { throw new IllegalStateException("No instance by name " + instanceName + " found."); } if (isRunning()) { throw new IllegalStateException( "Instance " + instanceName + " is running. Please stop it before installing any libraries."); } String libPathSuffix = CONF_DIR_REL + instanceFolder + "library" + Path.SEPARATOR + libDataverse + Path.SEPARATOR; Path src = new Path(extLibs); String fullLibPath = libPathSuffix + src.getName(); Path libFilePath = new Path(fs.getHomeDirectory(), fullLibPath); LOG.info("Copying Asterix external library to DFS"); fs.copyFromLocalFile(false, true, src, libFilePath); }
From source file:edu.uci.ics.pregelix.dataflow.HDFSFileWriteOperatorDescriptor.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//ww w . j a va2 s .c o m public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryInputSinkOperatorNodePushable() { private RecordDescriptor rd0; private FrameDeserializer frameDeserializer; private Configuration conf; private VertexWriter vertexWriter; private TaskAttemptContext context; private String TEMP_DIR = "_temporary"; private ClassLoader ctxCL; private ContextFactory ctxFactory = new ContextFactory(); @Override public void open() throws HyracksDataException { rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0) : inputRdFactory.createRecordDescriptor(); frameDeserializer = new FrameDeserializer(ctx.getFrameSize(), rd0); ctxCL = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf = confFactory.createConfiguration(); VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf); context = ctxFactory.createContext(conf, partition); try { vertexWriter = outputFormat.createVertexWriter(context); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void nextFrame(ByteBuffer frame) throws HyracksDataException { frameDeserializer.reset(frame); try { while (!frameDeserializer.done()) { Object[] tuple = frameDeserializer.deserializeRecord(); Vertex value = (Vertex) tuple[1]; vertexWriter.writeVertex(value); } } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void fail() throws HyracksDataException { Thread.currentThread().setContextClassLoader(ctxCL); } @Override public void close() throws HyracksDataException { try { vertexWriter.close(context); moveFilesToFinalPath(); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } private void moveFilesToFinalPath() throws HyracksDataException { try { JobContext job = ctxFactory.createJobContext(conf); Path outputPath = FileOutputFormat.getOutputPath(job); FileSystem dfs = FileSystem.get(conf); Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString()); FileStatus[] results = findPartitionPaths(outputPath, dfs); if (results.length >= 1) { /** * for Hadoop-0.20.2 */ renameFile(dfs, filePath, results); } else { /** * for Hadoop-0.23.1 */ int jobId = job.getJobID().getId(); outputPath = new Path( outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId); results = findPartitionPaths(outputPath, dfs); renameFile(dfs, filePath, results); } } catch (IOException e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException, IOException { FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().endsWith(TEMP_DIR); } }); Path tempDir = tempPaths[0].getPath(); FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0; } }); return results; } private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException, HyracksDataException, FileNotFoundException { Path srcDir = results[0].getPath(); if (!dfs.exists(srcDir)) throw new HyracksDataException("file " + srcDir.toString() + " does not exist!"); FileStatus[] srcFiles = dfs.listStatus(srcDir); Path srcFile = srcFiles[0].getPath(); dfs.delete(filePath, true); dfs.rename(srcFile, filePath); } }; }
From source file:edu.uci.ics.pregelix.dataflow.VertexFileWriteOperatorDescriptor.java
License:Apache License
@SuppressWarnings("rawtypes") @Override// ww w . j a v a2 s . com public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryInputSinkOperatorNodePushable() { private RecordDescriptor rd0; private FrameDeserializer frameDeserializer; private Configuration conf; private VertexWriter vertexWriter; private TaskAttemptContext context; private String TEMP_DIR = "_temporary"; private ClassLoader ctxCL; private ContextFactory ctxFactory = new ContextFactory(); @Override public void open() throws HyracksDataException { rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0) : inputRdFactory.createRecordDescriptor(ctx); frameDeserializer = new FrameDeserializer(rd0); ctxCL = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf = confFactory.createConfiguration(ctx); VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf); context = ctxFactory.createContext(conf, partition); context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); try { if (preHookFactory != null) { preHookFactory.createRuntimeHook().configure(ctx); } vertexWriter = outputFormat.createVertexWriter(context); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void nextFrame(ByteBuffer frame) throws HyracksDataException { frameDeserializer.reset(frame); try { while (!frameDeserializer.done()) { Object[] tuple = frameDeserializer.deserializeRecord(); Vertex value = (Vertex) tuple[1]; vertexWriter.writeVertex(value); } } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void fail() throws HyracksDataException { Thread.currentThread().setContextClassLoader(ctxCL); } @Override public void close() throws HyracksDataException { try { vertexWriter.close(context); moveFilesToFinalPath(); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } private void moveFilesToFinalPath() throws HyracksDataException { try { JobContext job = ctxFactory.createJobContext(conf); Path outputPath = FileOutputFormat.getOutputPath(job); FileSystem dfs = FileSystem.get(conf); Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString()); FileStatus[] results = findPartitionPaths(outputPath, dfs); if (results.length >= 1) { /** * for Hadoop-0.20.2 */ renameFile(dfs, filePath, results); } else { /** * for Hadoop-0.23.1 */ int jobId = job.getJobID().getId(); outputPath = new Path( outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId); results = findPartitionPaths(outputPath, dfs); renameFile(dfs, filePath, results); } } catch (IOException e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException, IOException { FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0; } }); Path tempDir = tempPaths[0].getPath(); FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0 && dir.getName().indexOf(".crc") < 0; } }); return results; } private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException, HyracksDataException, FileNotFoundException { Path srcDir = results[0].getPath(); if (!dfs.exists(srcDir)) { throw new HyracksDataException("file " + srcDir.toString() + " does not exist!"); } FileStatus[] srcFiles = dfs.listStatus(srcDir); Path srcFile = srcFiles[0].getPath(); dfs.delete(filePath, true); dfs.rename(srcFile, filePath); } }; }
From source file:edu.ucsb.cs.hybrid.io.Splitter.java
License:Apache License
/** * Checks input files and picks one with the requested S_size. * @param job : job configuration.// w ww . j a v a 2s . c o m * @param inputPath: path to contain the one map file. * @param othersPath: other path that contains the whole input. * @param S_size: s vectors put into one map file. */ public static void createOneMapFile(JobConf job, Path inputPath, Path othersPath, long S_size) throws IOException { FileStatus[] files = hdfs.listStatus(othersPath); for (int i = 0; i < files.length; i++) { if (Collector.countFileVectors(hdfs, files[i].getPath(), job) >= S_size) { SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, files[i].getPath(), job); SequenceFile.Writer writer = SequenceFile.createWriter(hdfs, job, new Path(inputPath.getName() + "/" + files[i].getPath().getName()), LongWritable.class, FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE); long vCount = -1; while (reader.next(key, value) && (++vCount) < S_size) writer.append(key, value); writer.close(); return; } } throw new UnsupportedEncodingException("S_size requested is larger than each file !"); }
From source file:edu.ucsb.cs.hybrid.io.Splitter.java
License:Apache License
/** * splits the files in the input directory into at most s vectors * each. It does not combine the vectors from two different partitions. * @param job : configurations./* ww w.j a v a 2 s . co m*/ * @param S_size : split files into at most this size of vectors. * @param inputPath : path of the directory of the input files. * @return path of the splitted files with each at most s vectors. */ public static Path splitAll(JobConf job, long S_size, Path inputPath) throws IOException { System.out.println( "Splitter.splitAll() from " + inputPath.getName() + " into partitions of size at most " + S_size); LongWritable key = new LongWritable(); FeatureWeightArrayWritable value = new FeatureWeightArrayWritable(); SequenceFile.Writer writer = null; String tmpDir = "splits-tmp"; hdfs.delete(new Path(tmpDir), true); hdfs.mkdirs(new Path(tmpDir)); FileStatus[] files = Partitioner.setFiles(hdfs, inputPath); for (int i = 0; i < files.length; i++) { if ((hdfs.isDirectory(files[i].getPath()) || files[i].getPath().getName().startsWith("_"))) continue; SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, files[i].getPath(), job); long subpartition = 0, vecCount = 0; while (reader.next(key, value)) { vecCount++; if (vecCount == 1) { if (writer != null) writer.close(); subpartition++; writer = SequenceFile.createWriter(hdfs, job, new Path(tmpDir + "/" + files[i].getPath().getName() + "-" + subpartition), LongWritable.class, FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE); } writer.append(key, value); if (vecCount == S_size) vecCount = 0; } } writer.close(); return new Path(tmpDir); }