List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.ibm.bi.dml.hops.recompile.Recompiler.java
License:Open Source License
/** * //from w w w. j av a 2 s . com * @param fname * @return * @throws DMLRuntimeException */ private static void tryReadMetaDataFileMatrixCharacteristics(DataOp dop) throws DMLRuntimeException { try { //get meta data filename String mtdname = DataExpression.getMTDFileName(dop.getFileName()); JobConf job = ConfigurationManager.getCachedJobConf(); FileSystem fs = FileSystem.get(job); Path path = new Path(mtdname); if (fs.exists(path)) { BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(path))); JSONObject mtd = JSONHelper.parse(br); DataType dt = DataType .valueOf(String.valueOf(mtd.get(DataExpression.DATATYPEPARAM)).toUpperCase()); dop.setDataType(dt); dop.setValueType(ValueType .valueOf(String.valueOf(mtd.get(DataExpression.VALUETYPEPARAM)).toUpperCase())); dop.setDim1((dt == DataType.MATRIX) ? Long.parseLong(mtd.get(DataExpression.READROWPARAM).toString()) : 0); dop.setDim2((dt == DataType.MATRIX) ? Long.parseLong(mtd.get(DataExpression.READCOLPARAM).toString()) : 0); } finally { if (br != null) br.close(); } } } catch (Exception ex) { throw new DMLRuntimeException(ex); } }
From source file:com.ibm.bi.dml.parser.antlr4.DMLParserWrapper.java
License:Open Source License
public static String readDMLScript(String script) throws IOException, LanguageException { String dmlScriptStr = null;/* w w w . j av a2 s .c o m*/ //read DML script from file if (script == null) throw new LanguageException("DML script path was not specified!"); StringBuilder sb = new StringBuilder(); BufferedReader in = null; try { //read from hdfs or gpfs file system if (script.startsWith("hdfs:") || script.startsWith("gpfs:")) { if (!LocalFileUtils.validateExternalFilename(script, true)) throw new LanguageException("Invalid (non-trustworthy) hdfs filename."); FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); Path scriptPath = new Path(script); in = new BufferedReader(new InputStreamReader(fs.open(scriptPath))); } // from local file system else { if (!LocalFileUtils.validateExternalFilename(script, false)) throw new LanguageException("Invalid (non-trustworthy) local filename."); in = new BufferedReader(new FileReader(script)); } //core script reading String tmp = null; while ((tmp = in.readLine()) != null) { sb.append(tmp); sb.append("\n"); } } catch (IOException ex) { LOG.error("Failed to read the script from the file system", ex); throw ex; } finally { if (in != null) in.close(); } dmlScriptStr = sb.toString(); return dmlScriptStr; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
/** * /* w ww . j a v a2s .c o m*/ * @param filename * @return * @throws LanguageException */ public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException { JSONObject retVal = null; boolean exists = false; FileSystem fs = null; try { fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); } catch (Exception e) { raiseValidateError("could not read the configuration file: " + e.getMessage(), false); } Path pt = new Path(filename); try { if (fs.exists(pt)) { exists = true; } } catch (Exception e) { exists = false; } boolean isDirBoolean = false; try { if (exists && fs.getFileStatus(pt).isDirectory()) isDirBoolean = true; else isDirBoolean = false; } catch (Exception e) { raiseValidateError( "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(), conditional); } // CASE: filename is a directory -- process as a directory if (exists && isDirBoolean) { // read directory contents retVal = new JSONObject(); FileStatus[] stats = null; try { stats = fs.listStatus(pt); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading directory with MTD file " + pt.toString() + ": " + e.getMessage(), conditional); } for (FileStatus stat : stats) { Path childPath = stat.getPath(); // gives directory name if (childPath.getName().startsWith("part")) { BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(childPath))); } catch (Exception e) { raiseValidateError("for MTD file in directory, error reading part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } JSONObject childObj = null; try { childObj = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("for MTD file in directory, error parsing part of MTD file with path " + childPath.toString() + ": " + e.getMessage(), conditional); } for (Object obj : childObj.entrySet()) { @SuppressWarnings("unchecked") Entry<Object, Object> e = (Entry<Object, Object>) obj; Object key = e.getKey(); Object val = e.getValue(); retVal.put(key, val); } } } // end for } // CASE: filename points to a file else if (exists) { BufferedReader br = null; // try reading MTD file try { br = new BufferedReader(new InputStreamReader(fs.open(pt))); } catch (Exception e) { raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } // try parsing MTD file try { retVal = JSONHelper.parse(br); } catch (Exception e) { raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(), conditional); } } return retVal; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
public String[] readMatrixMarketFile(String filename, boolean conditional) throws LanguageException { String[] retVal = new String[2]; retVal[0] = new String(""); retVal[1] = new String(""); boolean exists = false; try {/*from w w w . java 2 s . c om*/ FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); Path pt = new Path(filename); if (fs.exists(pt)) { exists = true; } boolean getFileStatusIsDir = fs.getFileStatus(pt).isDirectory(); if (exists && getFileStatusIsDir) { raiseValidateError("MatrixMarket files as directories not supported", conditional); } else if (exists) { BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); try { retVal[0] = in.readLine(); // skip all commented lines do { retVal[1] = in.readLine(); } while (retVal[1].charAt(0) == '%'); if (!retVal[0].startsWith("%%")) { raiseValidateError("MatrixMarket files must begin with a header line.", conditional); } } finally { if (in != null) in.close(); } } else { raiseValidateError("Could not find the file: " + filename, conditional); } } catch (IOException e) { //LOG.error(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename ); //throw new LanguageException(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename ); throw new LanguageException(e); } return retVal; }
From source file:com.ibm.bi.dml.parser.DataExpression.java
License:Open Source License
public boolean checkHasMatrixMarketFormat(String inputFileName, String mtdFileName, boolean conditional) throws LanguageException { // Check the MTD file exists. if there is an MTD file, return false. JSONObject mtdObject = readMetadataFile(mtdFileName, conditional); if (mtdObject != null) return false; boolean exists = false; FileSystem fs = null; try {/*from ww w. ja v a 2 s .co m*/ fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); } catch (Exception e) { LOG.error(this.printErrorLocation() + "could not read the configuration file."); throw new LanguageException(this.printErrorLocation() + "could not read the configuration file.", e); } Path pt = new Path(inputFileName); try { if (fs.exists(pt)) { exists = true; } } catch (Exception e) { LOG.error(this.printErrorLocation() + "file " + inputFileName + " not found"); throw new LanguageException(this.printErrorLocation() + "file " + inputFileName + " not found"); } try { // CASE: filename is a directory -- process as a directory if (exists && fs.getFileStatus(pt).isDirectory()) { // currently, only MM files as files are supported. So, if file is directory, then infer // likely not MM file return false; } // CASE: filename points to a file else if (exists) { //BufferedReader in = new BufferedReader(new FileReader(filename)); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt))); String headerLine = new String(""); if (in.ready()) headerLine = in.readLine(); in.close(); // check that headerline starts with "%%" // will infer malformed if (headerLine != null && headerLine.startsWith("%%")) return true; else return false; } else { return false; } } catch (Exception e) { return false; } }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java
License:Open Source License
/** * // w w w .j a v a 2 s . c om * @param path * @param job * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ private void readRawTextCellMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen, boolean matrixMarket) throws IOException { //create input stream for path InputStream inputStream = fs.open(path); //actual read readRawTextCellMatrixFromInputStream(inputStream, dest, rlen, clen, brlen, bclen, matrixMarket); }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java
License:Open Source License
/** * /*from w w w .ja v a2 s .c o m*/ * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @param hasHeader * @param delim * @param fill * @param fillValue * @return * @throws IOException */ @SuppressWarnings("unchecked") private MatrixBlock readCSVMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen, boolean hasHeader, String delim, boolean fill, double fillValue) throws IOException { ArrayList<Path> files = new ArrayList<Path>(); if (fs.isDirectory(path)) { for (FileStatus stat : fs.listStatus(path, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); } else files.add(path); if (dest == null) { dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue); clen = dest.getNumColumns(); } boolean sparse = dest.isInSparseFormat(); ///////////////////////////////////////// String value = null; int row = 0; int col = -1; double cellValue = 0; long lnnz = 0; for (int fileNo = 0; fileNo < files.size(); fileNo++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(files.get(fileNo)))); if (fileNo == 0 && hasHeader) br.readLine(); //ignore header // Read the data boolean emptyValuesFound = false; try { if (sparse) //SPARSE<-value { while ((value = br.readLine()) != null) //foreach line { String cellStr = value.toString().trim(); emptyValuesFound = false; String[] parts = IOUtilFunctions.split(cellStr, delim); col = 0; for (String part : parts) //foreach cell { part = part.trim(); if (part.isEmpty()) { emptyValuesFound = true; cellValue = fillValue; } else { cellValue = UtilFunctions.parseToDouble(part); } if (cellValue != 0) { dest.appendValue(row, col, cellValue); lnnz++; } col++; } //sanity checks for empty values and number of columns IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound); IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(path.toString(), cellStr, parts, clen); row++; } } else //DENSE<-value { while ((value = br.readLine()) != null) //foreach line { String cellStr = value.toString().trim(); emptyValuesFound = false; String[] parts = IOUtilFunctions.split(cellStr, delim); col = 0; for (String part : parts) //foreach cell { part = part.trim(); if (part.isEmpty()) { emptyValuesFound = true; cellValue = fillValue; } else { cellValue = UtilFunctions.parseToDouble(part); } if (cellValue != 0) { dest.setValueDenseUnsafe(row, col, cellValue); lnnz++; } col++; } //sanity checks for empty values and number of columns IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound); IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(path.toString(), cellStr, parts, clen); row++; } } } finally { IOUtilFunctions.closeSilently(br); } } //post processing dest.setNonZeros(lnnz); return dest; }
From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java
License:Open Source License
/** * // w ww . j av a2 s.co m * @param files * @param job * @param fs * @param hasHeader * @param delim * @param fill * @param fillValue * @return * @throws IOException */ private MatrixBlock computeCSVSize(List<Path> files, JobConf job, FileSystem fs, boolean hasHeader, String delim, boolean fill, double fillValue) throws IOException { int nrow = -1; int ncol = -1; String value = null; String cellStr = null; for (int fileNo = 0; fileNo < files.size(); fileNo++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(files.get(fileNo)))); try { // Read the header line, if there is one. if (fileNo == 0) { if (hasHeader) br.readLine(); //ignore header if ((value = br.readLine()) != null) { cellStr = value.toString().trim(); ncol = StringUtils.countMatches(cellStr, delim) + 1; nrow = 1; } } while ((value = br.readLine()) != null) { nrow++; } } finally { IOUtilFunctions.closeSilently(br); } } //create new matrix block (assume sparse for consistency w/ compiler) return new MatrixBlock(nrow, ncol, true); }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
/** * /*from w w w .j av a2 s. c o m*/ * @param srcFileName * @param fileName * @param rlen * @param clen * @param nnz * @throws IOException */ public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path src = new Path(srcFileName); Path merge = new Path(fileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(merge)) { hdfs.delete(merge, true); } OutputStream out = hdfs.create(merge, true); // write out the header first StringBuilder sb = new StringBuilder(); sb.append("%%MatrixMarket matrix coordinate real general\n"); // output number of rows, number of columns and number of nnz sb.append(rlen + " " + clen + " " + nnz + "\n"); out.write(sb.toString().getBytes()); // if the source is a directory if (hdfs.getFileStatus(src).isDirectory()) { try { FileStatus[] contents = hdfs.listStatus(src); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { InputStream in = hdfs.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); } finally { IOUtilFunctions.closeSilently(in); } } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(src)) { InputStream in = null; try { in = hdfs.open(src); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(src.toString() + ": No such file or directory"); } }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. * The part files are created by CSV_WRITE MR job. * /*ww w. j a va 2 s. c o m*/ * This method is invoked from CP-write instruction. * * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path mergedFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(mergedFilePath)) { hdfs.delete(mergedFilePath, true); } OutputStream out = hdfs.create(mergedFilePath, true); // write out the header, if needed if (csvprop.hasHeader()) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(csvprop.getDelim()); } sb.append('\n'); out.write(sb.toString().getBytes()); sb.setLength(0); } // if the source is a directory if (hdfs.isDirectory(srcFilePath)) { try { FileStatus[] contents = hdfs.listStatus(srcFilePath); Path[] partPaths = new Path[contents.length]; int numPartFiles = 0; for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { partPaths[i] = contents[i].getPath(); numPartFiles++; } } Arrays.sort(partPaths); for (int i = 0; i < numPartFiles; i++) { InputStream in = hdfs.open(partPaths[i]); try { IOUtils.copyBytes(in, out, conf, false); if (i < numPartFiles - 1) out.write('\n'); } finally { IOUtilFunctions.closeSilently(in); } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(srcFilePath)) { InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }