List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.moz.fiji.schema.tools.LayoutTool.java
License:Apache License
/** * Loads a table layout descriptor from a JSON-encoded file. * * @param filePath Path to a JSON-encoded table layout descriptor. * @return the table layout descriptor decoded from the file. * @throws Exception on error./*w w w . j a v a 2 s . co m*/ */ private TableLayoutDesc loadJsonTableLayoutDesc(String filePath) throws Exception { final Path path = new Path(filePath); final FileSystem fs = fileSystemSpecified(path) ? path.getFileSystem(getConf()) : FileSystem.getLocal(getConf()); final InputStream istream = fs.open(path); try { return FijiTableLayout.readTableLayoutDescFromJSON(istream); } finally { ResourceUtils.closeOrLog(istream); ResourceUtils.closeOrLog(fs); } }
From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }/*from w w w. j av a2 s . c o m*/ } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Set<String> loadDictionary(FileSystem fs, Path dictionaryPath) throws IOException { Set<String> dictionary = null; if (dictionaryPath != null) { dictionary = new HashSet<String>(); for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { dictionary.add(word.trim()); }/*from w w w . ja v a2 s . c o m*/ } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + dictionary.size()); } return dictionary; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<String, Integer> loadFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException { Map<String, Integer> featureIndex = null; if (dictionaryPath != null) { featureIndex = new HashMap<String, Integer>(); int idx = 0; for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { featureIndex.put(word.trim(), idx++); }//ww w. j av a2 s .co m } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + featureIndex.size()); } return featureIndex; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException { Map<Integer, String> featureIndex = null; if (dictionaryPath != null) { featureIndex = new HashMap<Integer, String>(); int idx = 0; for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { featureIndex.put(idx++, word.trim()); }//from w w w . j av a 2s . com } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + featureIndex.size()); } return featureIndex; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java
License:Apache License
private void loadDocumentIndex(String documentIndexPath) throws IOException { if (documentIndex == null) { documentIndex = new HashMap<String, Integer>(); Path p = new Path(documentIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { documentIndex.put(line.trim(), index++); }//from www .j a v a2 s .co m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded document index with size: " + documentIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }//from w w w. j ava 2 s . co m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java
License:Apache License
private void loadStopwordDict() throws IOException { if (stopwordDictPath != null) { stopwords = new HashSet<String>(); FileSystem hdfs = null; Path p = new Path(stopwordDictPath); hdfs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : hdfs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { stopwords.add(line.trim()); }//from ww w .jav a2s.c o m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded stopword dictionary with size: " + stopwords.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked", "finally" }) @Override/*from ww w . j a v a 2s.c o m*/ public void prepareToWrite(RecordWriter writer) throws IOException { if (dimensionPath != null) { Path p = new Path(dimensionPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = reader.readLine(); this.dimensions = Integer.parseInt(line); } catch (NumberFormatException nfe) { LOG.error("Unexpected input for dimensions", nfe); throw new IOException(); } finally { if (reader != null) { reader.close(); } // TODO: SMELLY: Why loop if we always cancel after the first file? break; } } } } this.writer = writer; }
From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedIndexWithKeys(FileSystem fs, Path dictionaryPath) throws IOException { Map<Integer, String> index = null; if (dictionaryPath != null) { index = new HashMap<Integer, String>(); for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] pair = line.split("\t"); index.put(Integer.parseInt(pair[0]), pair[1].trim()); }/*from ww w.j av a2 s. co m*/ } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + index.size()); } return index; }