Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

License:BSD License

public static String[] searchDataAttributes(final String name, Configuration conf, boolean s3)
        throws IOException {

    PathFilter filter = new PathFilter() {

        @Override/*from w w  w  . j  av a2  s.  c  om*/
        public boolean accept(Path arg0) {
            if (arg0.getName().contains("_SUCCESS"))
                return false;
            return true;
        }
    };

    Path path = null;
    FileSystem fs = null;

    if (s3) {
        path = new Path(conf.get("bucket") + dataAttributesDir + "/" + name);
        fs = FileSystem.get(path.toUri(), conf);
    } else {
        fs = FileSystem.get(new Configuration());
        path = new Path(fs.getHomeDirectory() + "/" + dataAttributesDir + "/" + name);
    }

    FileStatus[] status;

    try {
        status = fs.listStatus(path, filter);
    } catch (FileNotFoundException e) {
        return new String[0];
    }

    if (s3)
        fs.close();

    String[] names = new String[status.length];
    String fileName = "";
    for (int i = 0; i < status.length; i++) {
        fileName = status[i].getPath().getName();
        names[i] = fileName;
    }

    return names;
}

From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java

License:BSD License

public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile),
            SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass));

    for (FileStatus status : fs.listStatus(fromDirectory)) {
        if (status.isDirectory()) {
            System.out.println("Skip directory '" + status.getPath().getName() + "'");
            continue;
        }/* www. j  a va 2 s.  co  m*/

        Path file = status.getPath();

        if (file.getName().startsWith("_")) {
            System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders 
            continue;
        }

        //System.out.println("Merging '" + file.getName() + "'");

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        while (reader.next(key, value)) {
            writer.append(key, value);
        }

        reader.close();
    }

    writer.close();
}

From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java

License:BSD License

public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException {
    String fromDirectory = args[0];
    String toEventsDirectory = args[1];
    String toOutliersDirectory = args[2];
    String metadataFile = args[3];

    // Detecting datasets.

    HashSet<String> datasets = new HashSet<String>();

    FileReader fileReader = new FileReader(metadataFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String line;//from   ww  w  .  j ava2s .  c om
    while ((line = bufferedReader.readLine()) != null) {
        String[] parts = line.split(",");
        datasets.add(parts[0]);
    }
    bufferedReader.close();

    // Downloading relationships.

    String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})";
    Pattern relationshipPattern = Pattern.compile(relationshipPatternStr);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileSystem localFS = FileSystem.getLocal(conf);

    for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) {
        if (!status.isDirectory()) {
            continue;
        }
        Path file = status.getPath();

        Matcher m = relationshipPattern.matcher(file.getName());
        if (!m.find())
            continue;

        String ds1 = m.group(1);
        String ds2 = m.group(2);

        if (!datasets.contains(ds1))
            continue;
        if (!datasets.contains(ds2))
            continue;

        for (FileStatus statusDir : fs.listStatus(file)) {
            if (!statusDir.isDirectory()) {
                continue;
            }

            Path fromPath = statusDir.getPath();
            String toPathStr;
            if (fromPath.getName().contains("events")) {
                toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName();
            } else {
                toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-"
                        + fromPath.getName();
            }
            Path toPath = new Path(toPathStr);

            System.out.println("Copying:");
            System.out.println("  From: " + fromPath.toString());
            System.out.println("  To: " + toPath.toString());

            FileUtil.copyMerge(fs, // HDFS File System
                    fromPath, // HDFS path
                    localFS, // Local File System
                    toPath, // Local Path
                    false, // Do not delete HDFS path
                    conf, // Configuration
                    null);
        }
    }
}

From source file:edu.nyu.vida.data_polygamy.utils.MergeFiles.java

License:BSD License

public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass)
        throws IOException {

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile),
            SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass));

    for (FileStatus status : fs.listStatus(fromDirectory)) {
        if (status.isDirectory()) {
            System.out.println("Skip directory '" + status.getPath().getName() + "'");
            continue;
        }// www. ja  va  2  s .  c  om

        Path file = status.getPath();

        if (file.getName().startsWith("_")) {
            System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders 
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        while (reader.next(key, value)) {
            writer.append(key, value);
        }

        reader.close();
    }

    writer.close();
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

public static FileStatus[] listInputFiles(FileSystem fs, Path path) throws IOException {
    if (!fs.isDirectory(path))
        return new FileStatus[] { fs.getFileStatus(path) };
    else {//from ww  w.j av a2  s  .  co  m
        // Get all files in directory that are not directories or hidden files

        final FileSystem fsFinal = fs;
        PathFilter filter = new PathFilter() {
            public boolean accept(Path p) {
                try {
                    return !(fsFinal.isDirectory(p) || p.getName().startsWith(".")
                            || p.getName().startsWith("_"));
                } catch (IOException ex) {
                    throw new RuntimeException("Error filtering files.", ex);
                }
            }
        };

        return fs.listStatus(path, filter);
    }
}

From source file:edu.uci.ics.asterix.aoya.AsterixYARNClient.java

License:Apache License

/**
 * Upload External libraries and functions to HDFS for an instance to use when started
 * @throws IllegalStateException// ww  w  . ja  v a2  s  .  c o  m
 * @throws IOException
 */

private void installExtLibs() throws IllegalStateException, IOException {
    FileSystem fs = FileSystem.get(conf);
    if (!instanceExists()) {
        throw new IllegalStateException("No instance by name " + instanceName + " found.");
    }
    if (isRunning()) {
        throw new IllegalStateException(
                "Instance " + instanceName + " is running. Please stop it before installing any libraries.");
    }
    String libPathSuffix = CONF_DIR_REL + instanceFolder + "library" + Path.SEPARATOR + libDataverse
            + Path.SEPARATOR;
    Path src = new Path(extLibs);
    String fullLibPath = libPathSuffix + src.getName();
    Path libFilePath = new Path(fs.getHomeDirectory(), fullLibPath);
    LOG.info("Copying Asterix external library to DFS");
    fs.copyFromLocalFile(false, true, src, libFilePath);
}

From source file:edu.uci.ics.pregelix.dataflow.HDFSFileWriteOperatorDescriptor.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//ww w .  j a  va2  s .c o  m
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryInputSinkOperatorNodePushable() {
        private RecordDescriptor rd0;
        private FrameDeserializer frameDeserializer;
        private Configuration conf;
        private VertexWriter vertexWriter;
        private TaskAttemptContext context;
        private String TEMP_DIR = "_temporary";
        private ClassLoader ctxCL;
        private ContextFactory ctxFactory = new ContextFactory();

        @Override
        public void open() throws HyracksDataException {
            rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)
                    : inputRdFactory.createRecordDescriptor();
            frameDeserializer = new FrameDeserializer(ctx.getFrameSize(), rd0);
            ctxCL = Thread.currentThread().getContextClassLoader();
            Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
            conf = confFactory.createConfiguration();

            VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
            context = ctxFactory.createContext(conf, partition);
            try {
                vertexWriter = outputFormat.createVertexWriter(context);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @SuppressWarnings("unchecked")
        @Override
        public void nextFrame(ByteBuffer frame) throws HyracksDataException {
            frameDeserializer.reset(frame);
            try {
                while (!frameDeserializer.done()) {
                    Object[] tuple = frameDeserializer.deserializeRecord();
                    Vertex value = (Vertex) tuple[1];
                    vertexWriter.writeVertex(value);
                }
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            Thread.currentThread().setContextClassLoader(ctxCL);
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                vertexWriter.close(context);
                moveFilesToFinalPath();
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        private void moveFilesToFinalPath() throws HyracksDataException {
            try {
                JobContext job = ctxFactory.createJobContext(conf);
                Path outputPath = FileOutputFormat.getOutputPath(job);
                FileSystem dfs = FileSystem.get(conf);
                Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
                FileStatus[] results = findPartitionPaths(outputPath, dfs);
                if (results.length >= 1) {
                    /**
                     * for Hadoop-0.20.2
                     */
                    renameFile(dfs, filePath, results);
                } else {
                    /**
                     * for Hadoop-0.23.1
                     */
                    int jobId = job.getJobID().getId();
                    outputPath = new Path(
                            outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId);
                    results = findPartitionPaths(outputPath, dfs);
                    renameFile(dfs, filePath, results);
                }
            } catch (IOException e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }

        private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs)
                throws FileNotFoundException, IOException {
            FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
                @Override
                public boolean accept(Path dir) {
                    return dir.getName().endsWith(TEMP_DIR);
                }
            });
            Path tempDir = tempPaths[0].getPath();
            FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
                @Override
                public boolean accept(Path dir) {
                    return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
                }
            });
            return results;
        }

        private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results)
                throws IOException, HyracksDataException, FileNotFoundException {
            Path srcDir = results[0].getPath();
            if (!dfs.exists(srcDir))
                throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");

            FileStatus[] srcFiles = dfs.listStatus(srcDir);
            Path srcFile = srcFiles[0].getPath();
            dfs.delete(filePath, true);
            dfs.rename(srcFile, filePath);
        }

    };
}

From source file:edu.uci.ics.pregelix.dataflow.VertexFileWriteOperatorDescriptor.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//  ww  w .  j a  v a2 s  .  com
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryInputSinkOperatorNodePushable() {
        private RecordDescriptor rd0;
        private FrameDeserializer frameDeserializer;
        private Configuration conf;
        private VertexWriter vertexWriter;
        private TaskAttemptContext context;
        private String TEMP_DIR = "_temporary";
        private ClassLoader ctxCL;
        private ContextFactory ctxFactory = new ContextFactory();

        @Override
        public void open() throws HyracksDataException {
            rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)
                    : inputRdFactory.createRecordDescriptor(ctx);
            frameDeserializer = new FrameDeserializer(rd0);
            ctxCL = Thread.currentThread().getContextClassLoader();
            Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
            conf = confFactory.createConfiguration(ctx);

            VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
            context = ctxFactory.createContext(conf, partition);
            context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
            try {
                if (preHookFactory != null) {
                    preHookFactory.createRuntimeHook().configure(ctx);
                }
                vertexWriter = outputFormat.createVertexWriter(context);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @SuppressWarnings("unchecked")
        @Override
        public void nextFrame(ByteBuffer frame) throws HyracksDataException {
            frameDeserializer.reset(frame);
            try {
                while (!frameDeserializer.done()) {
                    Object[] tuple = frameDeserializer.deserializeRecord();
                    Vertex value = (Vertex) tuple[1];
                    vertexWriter.writeVertex(value);
                }
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        @Override
        public void fail() throws HyracksDataException {
            Thread.currentThread().setContextClassLoader(ctxCL);
        }

        @Override
        public void close() throws HyracksDataException {
            try {
                vertexWriter.close(context);
                moveFilesToFinalPath();
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        private void moveFilesToFinalPath() throws HyracksDataException {
            try {
                JobContext job = ctxFactory.createJobContext(conf);
                Path outputPath = FileOutputFormat.getOutputPath(job);
                FileSystem dfs = FileSystem.get(conf);
                Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
                FileStatus[] results = findPartitionPaths(outputPath, dfs);
                if (results.length >= 1) {
                    /**
                     * for Hadoop-0.20.2
                     */
                    renameFile(dfs, filePath, results);
                } else {
                    /**
                     * for Hadoop-0.23.1
                     */
                    int jobId = job.getJobID().getId();
                    outputPath = new Path(
                            outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId);
                    results = findPartitionPaths(outputPath, dfs);
                    renameFile(dfs, filePath, results);
                }
            } catch (IOException e) {
                throw new HyracksDataException(e);
            } finally {
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }

        private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs)
                throws FileNotFoundException, IOException {
            FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
                @Override
                public boolean accept(Path dir) {
                    return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0;
                }
            });
            Path tempDir = tempPaths[0].getPath();
            FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
                @Override
                public boolean accept(Path dir) {
                    return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0
                            && dir.getName().indexOf(".crc") < 0;
                }
            });
            return results;
        }

        private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results)
                throws IOException, HyracksDataException, FileNotFoundException {
            Path srcDir = results[0].getPath();
            if (!dfs.exists(srcDir)) {
                throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
            }

            FileStatus[] srcFiles = dfs.listStatus(srcDir);
            Path srcFile = srcFiles[0].getPath();
            dfs.delete(filePath, true);
            dfs.rename(srcFile, filePath);
        }

    };
}

From source file:edu.ucsb.cs.hybrid.io.Splitter.java

License:Apache License

/**
 * Checks input files and picks one with the requested S_size.
 * @param job : job configuration.//  w ww  . j  a  v a  2s  . c  o  m
 * @param inputPath: path to contain the one map file.
 * @param othersPath: other path that contains the whole input.
 * @param S_size: s vectors put into one map file.
 */
public static void createOneMapFile(JobConf job, Path inputPath, Path othersPath, long S_size)
        throws IOException {
    FileStatus[] files = hdfs.listStatus(othersPath);
    for (int i = 0; i < files.length; i++) {
        if (Collector.countFileVectors(hdfs, files[i].getPath(), job) >= S_size) {
            SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, files[i].getPath(), job);
            SequenceFile.Writer writer = SequenceFile.createWriter(hdfs, job,
                    new Path(inputPath.getName() + "/" + files[i].getPath().getName()), LongWritable.class,
                    FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE);

            long vCount = -1;
            while (reader.next(key, value) && (++vCount) < S_size)
                writer.append(key, value);
            writer.close();
            return;
        }
    }
    throw new UnsupportedEncodingException("S_size requested is larger than each file !");
}

From source file:edu.ucsb.cs.hybrid.io.Splitter.java

License:Apache License

/**
 * splits the files in the input directory into at most s vectors
 * each. It does not combine the vectors from two different partitions.
 * @param job : configurations./*  ww w.j  a v a 2 s  . co  m*/
 * @param S_size : split files into at most this size of vectors.
 * @param inputPath : path of the directory of the input files.
 * @return path of the splitted files with each at most s vectors.
 */
public static Path splitAll(JobConf job, long S_size, Path inputPath) throws IOException {

    System.out.println(
            "Splitter.splitAll() from " + inputPath.getName() + " into partitions of size at most " + S_size);
    LongWritable key = new LongWritable();
    FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();
    SequenceFile.Writer writer = null;

    String tmpDir = "splits-tmp";
    hdfs.delete(new Path(tmpDir), true);
    hdfs.mkdirs(new Path(tmpDir));

    FileStatus[] files = Partitioner.setFiles(hdfs, inputPath);
    for (int i = 0; i < files.length; i++) {
        if ((hdfs.isDirectory(files[i].getPath()) || files[i].getPath().getName().startsWith("_")))
            continue;
        SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, files[i].getPath(), job);
        long subpartition = 0, vecCount = 0;

        while (reader.next(key, value)) {
            vecCount++;
            if (vecCount == 1) {
                if (writer != null)
                    writer.close();
                subpartition++;
                writer = SequenceFile.createWriter(hdfs, job,
                        new Path(tmpDir + "/" + files[i].getPath().getName() + "-" + subpartition),
                        LongWritable.class, FeatureWeightArrayWritable.class,
                        SequenceFile.CompressionType.NONE);

            }
            writer.append(key, value);
            if (vecCount == S_size)
                vecCount = 0;
        }
    }
    writer.close();
    return new Path(tmpDir);
}