Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.moz.fiji.schema.tools.LayoutTool.java

License:Apache License

/**
 * Loads a table layout descriptor from a JSON-encoded file.
 *
 * @param filePath Path to a JSON-encoded table layout descriptor.
 * @return the table layout descriptor decoded from the file.
 * @throws Exception on error./*w w  w  . j  a v  a  2  s .  co  m*/
 */
private TableLayoutDesc loadJsonTableLayoutDesc(String filePath) throws Exception {
    final Path path = new Path(filePath);
    final FileSystem fs = fileSystemSpecified(path) ? path.getFileSystem(getConf())
            : FileSystem.getLocal(getConf());
    final InputStream istream = fs.open(path);
    try {
        return FijiTableLayout.readTableLayoutDescFromJSON(istream);
    } finally {
        ResourceUtils.closeOrLog(istream);
        ResourceUtils.closeOrLog(fs);
    }
}

From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }/*from  w w  w. j  av a2 s . c  o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Set<String> loadDictionary(FileSystem fs, Path dictionaryPath) throws IOException {
    Set<String> dictionary = null;
    if (dictionaryPath != null) {
        dictionary = new HashSet<String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        dictionary.add(word.trim());
                    }/*from w  w  w  . ja v a2 s  .  c o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + dictionary.size());
    }

    return dictionary;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<String, Integer> loadFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException {
    Map<String, Integer> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<String, Integer>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(word.trim(), idx++);
                    }//ww w.  j av  a2 s  .co  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedFeatureIndex(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<Integer, String>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(idx++, word.trim());
                    }//from   w w  w .  j  av a 2s .  com
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java

License:Apache License

private void loadDocumentIndex(String documentIndexPath) throws IOException {
    if (documentIndex == null) {
        documentIndex = new HashMap<String, Integer>();

        Path p = new Path(documentIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        documentIndex.put(line.trim(), index++);
                    }//from   www  .j a  v a2 s .co m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded document index with size: " + documentIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }//from  w  w w.  j ava 2  s  . co m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java

License:Apache License

private void loadStopwordDict() throws IOException {
    if (stopwordDictPath != null) {
        stopwords = new HashSet<String>();

        FileSystem hdfs = null;
        Path p = new Path(stopwordDictPath);
        hdfs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : hdfs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        stopwords.add(line.trim());
                    }//from   ww  w  .jav a2s.c o m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded stopword dictionary with size: " + stopwords.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked", "finally" })
@Override/*from  ww w . j  a v  a 2s.c o  m*/
public void prepareToWrite(RecordWriter writer) throws IOException {
    if (dimensionPath != null) {
        Path p = new Path(dimensionPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = reader.readLine();
                    this.dimensions = Integer.parseInt(line);
                } catch (NumberFormatException nfe) {
                    LOG.error("Unexpected input for dimensions", nfe);
                    throw new IOException();
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                    // TODO: SMELLY: Why loop if we always cancel after the first file?
                    break;
                }
            }
        }
    }
    this.writer = writer;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedIndexWithKeys(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> index = null;
    if (dictionaryPath != null) {
        index = new HashMap<Integer, String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        String[] pair = line.split("\t");
                        index.put(Integer.parseInt(pair[0]), pair[1].trim());
                    }/*from  ww  w.j av a2 s.  co m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + index.size());
    }

    return index;
}