Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.ibm.bi.dml.hops.recompile.Recompiler.java

License:Open Source License

/**
 * //from  w w w.  j av a  2 s  .  com
 * @param fname
 * @return
 * @throws DMLRuntimeException
 */
private static void tryReadMetaDataFileMatrixCharacteristics(DataOp dop) throws DMLRuntimeException {
    try {
        //get meta data filename
        String mtdname = DataExpression.getMTDFileName(dop.getFileName());

        JobConf job = ConfigurationManager.getCachedJobConf();
        FileSystem fs = FileSystem.get(job);
        Path path = new Path(mtdname);
        if (fs.exists(path)) {
            BufferedReader br = null;
            try {
                br = new BufferedReader(new InputStreamReader(fs.open(path)));
                JSONObject mtd = JSONHelper.parse(br);

                DataType dt = DataType
                        .valueOf(String.valueOf(mtd.get(DataExpression.DATATYPEPARAM)).toUpperCase());
                dop.setDataType(dt);
                dop.setValueType(ValueType
                        .valueOf(String.valueOf(mtd.get(DataExpression.VALUETYPEPARAM)).toUpperCase()));
                dop.setDim1((dt == DataType.MATRIX)
                        ? Long.parseLong(mtd.get(DataExpression.READROWPARAM).toString())
                        : 0);
                dop.setDim2((dt == DataType.MATRIX)
                        ? Long.parseLong(mtd.get(DataExpression.READCOLPARAM).toString())
                        : 0);
            } finally {
                if (br != null)
                    br.close();
            }
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
}

From source file:com.ibm.bi.dml.parser.antlr4.DMLParserWrapper.java

License:Open Source License

public static String readDMLScript(String script) throws IOException, LanguageException {
    String dmlScriptStr = null;/*  w  w  w .  j av  a2 s .c o  m*/

    //read DML script from file
    if (script == null)
        throw new LanguageException("DML script path was not specified!");

    StringBuilder sb = new StringBuilder();
    BufferedReader in = null;
    try {
        //read from hdfs or gpfs file system
        if (script.startsWith("hdfs:") || script.startsWith("gpfs:")) {
            if (!LocalFileUtils.validateExternalFilename(script, true))
                throw new LanguageException("Invalid (non-trustworthy) hdfs filename.");
            FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
            Path scriptPath = new Path(script);
            in = new BufferedReader(new InputStreamReader(fs.open(scriptPath)));
        }
        // from local file system
        else {
            if (!LocalFileUtils.validateExternalFilename(script, false))
                throw new LanguageException("Invalid (non-trustworthy) local filename.");
            in = new BufferedReader(new FileReader(script));
        }

        //core script reading
        String tmp = null;
        while ((tmp = in.readLine()) != null) {
            sb.append(tmp);
            sb.append("\n");
        }
    } catch (IOException ex) {
        LOG.error("Failed to read the script from the file system", ex);
        throw ex;
    } finally {
        if (in != null)
            in.close();
    }

    dmlScriptStr = sb.toString();

    return dmlScriptStr;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

/**
 * /* w ww . j  a  v  a2s .c o  m*/
 * @param filename
 * @return
 * @throws LanguageException
 */
public JSONObject readMetadataFile(String filename, boolean conditional) throws LanguageException {
    JSONObject retVal = null;
    boolean exists = false;
    FileSystem fs = null;

    try {
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        raiseValidateError("could not read the configuration file: " + e.getMessage(), false);
    }

    Path pt = new Path(filename);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        exists = false;
    }

    boolean isDirBoolean = false;
    try {
        if (exists && fs.getFileStatus(pt).isDirectory())
            isDirBoolean = true;
        else
            isDirBoolean = false;
    } catch (Exception e) {
        raiseValidateError(
                "error validing whether path " + pt.toString() + " is directory or not: " + e.getMessage(),
                conditional);
    }

    // CASE: filename is a directory -- process as a directory
    if (exists && isDirBoolean) {

        // read directory contents
        retVal = new JSONObject();

        FileStatus[] stats = null;

        try {
            stats = fs.listStatus(pt);
        } catch (Exception e) {
            raiseValidateError("for MTD file in directory, error reading directory with MTD file "
                    + pt.toString() + ": " + e.getMessage(), conditional);
        }

        for (FileStatus stat : stats) {
            Path childPath = stat.getPath(); // gives directory name
            if (childPath.getName().startsWith("part")) {

                BufferedReader br = null;
                try {
                    br = new BufferedReader(new InputStreamReader(fs.open(childPath)));
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error reading part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                JSONObject childObj = null;
                try {
                    childObj = JSONHelper.parse(br);
                } catch (Exception e) {
                    raiseValidateError("for MTD file in directory, error parsing part of MTD file with path "
                            + childPath.toString() + ": " + e.getMessage(), conditional);
                }

                for (Object obj : childObj.entrySet()) {
                    @SuppressWarnings("unchecked")
                    Entry<Object, Object> e = (Entry<Object, Object>) obj;
                    Object key = e.getKey();
                    Object val = e.getValue();
                    retVal.put(key, val);
                }
            }
        } // end for 
    }

    // CASE: filename points to a file
    else if (exists) {

        BufferedReader br = null;

        // try reading MTD file
        try {
            br = new BufferedReader(new InputStreamReader(fs.open(pt)));
        } catch (Exception e) {
            raiseValidateError("error reading MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }

        // try parsing MTD file
        try {
            retVal = JSONHelper.parse(br);
        } catch (Exception e) {
            raiseValidateError("error parsing MTD file with path " + pt.toString() + ": " + e.getMessage(),
                    conditional);
        }
    }

    return retVal;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public String[] readMatrixMarketFile(String filename, boolean conditional) throws LanguageException {
    String[] retVal = new String[2];
    retVal[0] = new String("");
    retVal[1] = new String("");
    boolean exists = false;

    try {/*from  w w  w  . java  2 s . c  om*/
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        Path pt = new Path(filename);
        if (fs.exists(pt)) {
            exists = true;
        }

        boolean getFileStatusIsDir = fs.getFileStatus(pt).isDirectory();

        if (exists && getFileStatusIsDir) {
            raiseValidateError("MatrixMarket files as directories not supported", conditional);
        } else if (exists) {
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));
            try {
                retVal[0] = in.readLine();
                // skip all commented lines
                do {
                    retVal[1] = in.readLine();
                } while (retVal[1].charAt(0) == '%');

                if (!retVal[0].startsWith("%%")) {
                    raiseValidateError("MatrixMarket files must begin with a header line.", conditional);
                }
            } finally {
                if (in != null)
                    in.close();
            }
        } else {
            raiseValidateError("Could not find the file: " + filename, conditional);
        }

    } catch (IOException e) {
        //LOG.error(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        //throw new LanguageException(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        throw new LanguageException(e);
    }

    return retVal;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public boolean checkHasMatrixMarketFormat(String inputFileName, String mtdFileName, boolean conditional)
        throws LanguageException {
    // Check the MTD file exists. if there is an MTD file, return false.
    JSONObject mtdObject = readMetadataFile(mtdFileName, conditional);

    if (mtdObject != null)
        return false;

    boolean exists = false;
    FileSystem fs = null;

    try {/*from ww  w.  ja  v  a  2 s  .co m*/
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "could not read the configuration file.");
        throw new LanguageException(this.printErrorLocation() + "could not read the configuration file.", e);
    }

    Path pt = new Path(inputFileName);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "file " + inputFileName + " not found");
        throw new LanguageException(this.printErrorLocation() + "file " + inputFileName + " not found");
    }

    try {
        // CASE: filename is a directory -- process as a directory
        if (exists && fs.getFileStatus(pt).isDirectory()) {

            // currently, only MM files as files are supported.  So, if file is directory, then infer 
            // likely not MM file
            return false;
        }
        // CASE: filename points to a file
        else if (exists) {

            //BufferedReader in = new BufferedReader(new FileReader(filename));
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));

            String headerLine = new String("");
            if (in.ready())
                headerLine = in.readLine();
            in.close();

            // check that headerline starts with "%%"
            // will infer malformed 
            if (headerLine != null && headerLine.startsWith("%%"))
                return true;
            else
                return false;
        } else {
            return false;
        }

    } catch (Exception e) {
        return false;
    }
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCell.java

License:Open Source License

/**
 * //  w w w  .j a v  a 2 s . c  om
 * @param path
 * @param job
 * @param dest
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @throws IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void readRawTextCellMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen,
        long clen, int brlen, int bclen, boolean matrixMarket) throws IOException {
    //create input stream for path
    InputStream inputStream = fs.open(path);

    //actual read
    readRawTextCellMatrixFromInputStream(inputStream, dest, rlen, clen, brlen, bclen, matrixMarket);
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java

License:Open Source License

/**
 * /*from   w w  w .ja  v  a2 s .c o  m*/
 * @param path
 * @param job
 * @param fs
 * @param dest
 * @param rlen
 * @param clen
 * @param brlen
 * @param bclen
 * @param hasHeader
 * @param delim
 * @param fill
 * @param fillValue
 * @return
 * @throws IOException
 */
@SuppressWarnings("unchecked")
private MatrixBlock readCSVMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen,
        long clen, int brlen, int bclen, boolean hasHeader, String delim, boolean fill, double fillValue)
        throws IOException {
    ArrayList<Path> files = new ArrayList<Path>();
    if (fs.isDirectory(path)) {
        for (FileStatus stat : fs.listStatus(path, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);
    } else
        files.add(path);

    if (dest == null) {
        dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue);
        clen = dest.getNumColumns();
    }

    boolean sparse = dest.isInSparseFormat();

    /////////////////////////////////////////
    String value = null;
    int row = 0;
    int col = -1;
    double cellValue = 0;
    long lnnz = 0;

    for (int fileNo = 0; fileNo < files.size(); fileNo++) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(files.get(fileNo))));
        if (fileNo == 0 && hasHeader)
            br.readLine(); //ignore header

        // Read the data
        boolean emptyValuesFound = false;
        try {
            if (sparse) //SPARSE<-value
            {
                while ((value = br.readLine()) != null) //foreach line
                {
                    String cellStr = value.toString().trim();
                    emptyValuesFound = false;
                    String[] parts = IOUtilFunctions.split(cellStr, delim);
                    col = 0;

                    for (String part : parts) //foreach cell
                    {
                        part = part.trim();
                        if (part.isEmpty()) {
                            emptyValuesFound = true;
                            cellValue = fillValue;
                        } else {
                            cellValue = UtilFunctions.parseToDouble(part);
                        }
                        if (cellValue != 0) {
                            dest.appendValue(row, col, cellValue);
                            lnnz++;
                        }
                        col++;
                    }

                    //sanity checks for empty values and number of columns
                    IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound);
                    IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(path.toString(), cellStr, parts, clen);
                    row++;
                }
            } else //DENSE<-value
            {
                while ((value = br.readLine()) != null) //foreach line
                {
                    String cellStr = value.toString().trim();
                    emptyValuesFound = false;
                    String[] parts = IOUtilFunctions.split(cellStr, delim);
                    col = 0;

                    for (String part : parts) //foreach cell
                    {
                        part = part.trim();
                        if (part.isEmpty()) {
                            emptyValuesFound = true;
                            cellValue = fillValue;
                        } else {
                            cellValue = UtilFunctions.parseToDouble(part);
                        }
                        if (cellValue != 0) {
                            dest.setValueDenseUnsafe(row, col, cellValue);
                            lnnz++;
                        }
                        col++;
                    }

                    //sanity checks for empty values and number of columns
                    IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(cellStr, fill, emptyValuesFound);
                    IOUtilFunctions.checkAndRaiseErrorCSVNumColumns(path.toString(), cellStr, parts, clen);
                    row++;
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(br);
        }
    }

    //post processing
    dest.setNonZeros(lnnz);

    return dest;
}

From source file:com.ibm.bi.dml.runtime.io.ReaderTextCSV.java

License:Open Source License

/**
 * // w ww . j av  a2 s.co  m
 * @param files
 * @param job
 * @param fs
 * @param hasHeader
 * @param delim
 * @param fill
 * @param fillValue
 * @return
 * @throws IOException
 */
private MatrixBlock computeCSVSize(List<Path> files, JobConf job, FileSystem fs, boolean hasHeader,
        String delim, boolean fill, double fillValue) throws IOException {
    int nrow = -1;
    int ncol = -1;
    String value = null;

    String cellStr = null;
    for (int fileNo = 0; fileNo < files.size(); fileNo++) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(files.get(fileNo))));
        try {
            // Read the header line, if there is one.
            if (fileNo == 0) {
                if (hasHeader)
                    br.readLine(); //ignore header
                if ((value = br.readLine()) != null) {
                    cellStr = value.toString().trim();
                    ncol = StringUtils.countMatches(cellStr, delim) + 1;
                    nrow = 1;
                }
            }

            while ((value = br.readLine()) != null) {
                nrow++;
            }
        } finally {
            IOUtilFunctions.closeSilently(br);
        }
    }

    //create new matrix block (assume sparse for consistency w/ compiler)
    return new MatrixBlock(nrow, ncol, true);
}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * /*from  w  w w .j  av  a2  s.  c o  m*/
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * /*ww w.  j  a va 2  s. c  o m*/
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}