Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public String[] readMatrixMarketFile(String filename, boolean conditional) throws LanguageException {
    String[] retVal = new String[2];
    retVal[0] = new String("");
    retVal[1] = new String("");
    boolean exists = false;

    try {//from   w ww  .  j  av  a2 s .  com
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        Path pt = new Path(filename);
        if (fs.exists(pt)) {
            exists = true;
        }

        boolean getFileStatusIsDir = fs.getFileStatus(pt).isDirectory();

        if (exists && getFileStatusIsDir) {
            raiseValidateError("MatrixMarket files as directories not supported", conditional);
        } else if (exists) {
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));
            try {
                retVal[0] = in.readLine();
                // skip all commented lines
                do {
                    retVal[1] = in.readLine();
                } while (retVal[1].charAt(0) == '%');

                if (!retVal[0].startsWith("%%")) {
                    raiseValidateError("MatrixMarket files must begin with a header line.", conditional);
                }
            } finally {
                if (in != null)
                    in.close();
            }
        } else {
            raiseValidateError("Could not find the file: " + filename, conditional);
        }

    } catch (IOException e) {
        //LOG.error(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        //throw new LanguageException(this.printErrorLocation() + "Error reading MatrixMarket file: " + filename );
        throw new LanguageException(e);
    }

    return retVal;
}

From source file:com.ibm.bi.dml.parser.DataExpression.java

License:Open Source License

public boolean checkHasMatrixMarketFormat(String inputFileName, String mtdFileName, boolean conditional)
        throws LanguageException {
    // Check the MTD file exists. if there is an MTD file, return false.
    JSONObject mtdObject = readMetadataFile(mtdFileName, conditional);

    if (mtdObject != null)
        return false;

    boolean exists = false;
    FileSystem fs = null;

    try {//w ww .jav  a  2  s  . c  o  m
        fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "could not read the configuration file.");
        throw new LanguageException(this.printErrorLocation() + "could not read the configuration file.", e);
    }

    Path pt = new Path(inputFileName);
    try {
        if (fs.exists(pt)) {
            exists = true;
        }
    } catch (Exception e) {
        LOG.error(this.printErrorLocation() + "file " + inputFileName + " not found");
        throw new LanguageException(this.printErrorLocation() + "file " + inputFileName + " not found");
    }

    try {
        // CASE: filename is a directory -- process as a directory
        if (exists && fs.getFileStatus(pt).isDirectory()) {

            // currently, only MM files as files are supported.  So, if file is directory, then infer 
            // likely not MM file
            return false;
        }
        // CASE: filename points to a file
        else if (exists) {

            //BufferedReader in = new BufferedReader(new FileReader(filename));
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(pt)));

            String headerLine = new String("");
            if (in.ready())
                headerLine = in.readLine();
            in.close();

            // check that headerline starts with "%%"
            // will infer malformed 
            if (headerLine != null && headerLine.startsWith("%%"))
                return true;
            else
                return false;
        } else {
            return false;
        }

    } catch (Exception e) {
        return false;
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.opt.OptimizerRuleBased.java

License:Open Source License

/**
 * Increasing the partition replication factor is beneficial if partitions are
 * read multiple times (e.g., in nested loops) because partitioning (done once)
 * gets slightly slower but there is a higher probability for local access
 * /*w  ww. j a v a  2  s.c o m*/
 * NOTE: this rewrite requires 'set data partitioner' to be executed in order to
 * leverage the partitioning information in the plan tree. 
 *  
 * @param n
 * @throws DMLRuntimeException 
 */
protected void rewriteSetPartitionReplicationFactor(OptNode n,
        HashMap<String, PDataPartitionFormat> partitionedMatrices, LocalVariableMap vars)
        throws DMLRuntimeException {
    boolean apply = false;
    double sizeReplicated = 0;
    int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;

    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping()
            .getMappedProg(n.getID())[1];

    if (n.getExecType() == ExecType.MR
            && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.toString())
            && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) {
        apply = true;

        //account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);

        //account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT);

        //account for remaining hdfs capacity
        try {
            FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
            long hdfsCapacityRemain = fs.getStatus().getRemaining();
            long sizeInputs = 0; //sum of all input sizes (w/o replication)
            for (String var : partitionedMatrices.keySet()) {
                MatrixObject mo = (MatrixObject) vars.get(var);
                Path fname = new Path(mo.getFileName());
                if (fs.exists(fname)) //non-existing (e.g., CP) -> small file
                    sizeInputs += fs.getContentSummary(fname).getLength();
            }
            replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs));

            //ensure at least replication 1
            replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
            sizeReplicated = replication * sizeInputs;
        } catch (Exception ex) {
            throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
        }
    }

    //modify the runtime plan 
    if (apply)
        pfpb.setPartitionReplicationFactor(replication);

    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply
            + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : ""));
}

From source file:com.ibm.bi.dml.runtime.io.MatrixReader.java

License:Open Source License

/**
 * //from   ww w .  ja  va2 s . com
 * @param fs
 * @param path
 * @throws IOException 
 */
protected static void checkValidInputFile(FileSystem fs, Path path) throws IOException {
    //check non-existing file
    if (!fs.exists(path))
        throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");

    //check for empty file
    if (MapReduceTool.isFileEmpty(fs, path.toString()))
        throw new EOFException("Empty input file " + path.toString() + ".");

}

From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java

License:Open Source License

/**
 * /* w w  w.  j  a  v a  2  s.c  o m*/
 * @param srcFileName
 * @param fileName
 * @param rlen
 * @param clen
 * @param nnz
 * @throws IOException
 */
public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path src = new Path(srcFileName);
    Path merge = new Path(fileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(merge)) {
        hdfs.delete(merge, true);
    }

    OutputStream out = hdfs.create(merge, true);

    // write out the header first 
    StringBuilder sb = new StringBuilder();
    sb.append("%%MatrixMarket matrix coordinate real general\n");

    // output number of rows, number of columns and number of nnz
    sb.append(rlen + " " + clen + " " + nnz + "\n");
    out.write(sb.toString().getBytes());

    // if the source is a directory
    if (hdfs.getFileStatus(src).isDirectory()) {
        try {
            FileStatus[] contents = hdfs.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    InputStream in = hdfs.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                    } finally {
                        IOUtilFunctions.closeSilently(in);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(src)) {
        InputStream in = null;
        try {
            in = hdfs.open(src);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(src.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * Method to merge multiple CSV part files on HDFS into a single CSV file on HDFS. 
 * The part files are created by CSV_WRITE MR job. 
 * // w  w w .jav  a2s . c  om
 * This method is invoked from CP-write instruction.
 * 
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
public void mergeCSVPartFiles(String srcFileName, String destFileName, CSVFileFormatProperties csvprop,
        long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path mergedFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (hdfs.exists(mergedFilePath)) {
        hdfs.delete(mergedFilePath, true);
    }
    OutputStream out = hdfs.create(mergedFilePath, true);

    // write out the header, if needed
    if (csvprop.hasHeader()) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < clen; i++) {
            sb.append("C" + (i + 1));
            if (i < clen - 1)
                sb.append(csvprop.getDelim());
        }
        sb.append('\n');
        out.write(sb.toString().getBytes());
        sb.setLength(0);
    }

    // if the source is a directory
    if (hdfs.isDirectory(srcFilePath)) {
        try {
            FileStatus[] contents = hdfs.listStatus(srcFilePath);
            Path[] partPaths = new Path[contents.length];
            int numPartFiles = 0;
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDirectory()) {
                    partPaths[i] = contents[i].getPath();
                    numPartFiles++;
                }
            }
            Arrays.sort(partPaths);

            for (int i = 0; i < numPartFiles; i++) {
                InputStream in = hdfs.open(partPaths[i]);
                try {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (i < numPartFiles - 1)
                        out.write('\n');
                } finally {
                    IOUtilFunctions.closeSilently(in);
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } else if (hdfs.isFile(srcFilePath)) {
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void cleanupJob(JobContext context) throws IOException {
    JobConf conf = context.getJobConf();
    // do the clean up of temporary directory
    Path outputPath = FileOutputFormat.getOutputPath(conf);
    if (outputPath != null) {
        FileSystem fs = outputPath.getFileSystem(conf);
        context.getProgressible().progress();
        if (fs.exists(outputPath))
            fs.delete(outputPath, true);
    }//from www.  j a  va  2  s .  com
}

From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java

License:Open Source License

@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    JobConf conf = context.getJobConf();
    TaskAttemptID attemptId = context.getTaskAttemptID();

    // get the mapping between index to output filename
    outputs = MRJobConfiguration.getOutputs(conf);

    //get temp task output path (compatible with hadoop1 and hadoop2)
    Path taskOutPath = FileOutputFormat.getWorkOutputPath(conf);
    FileSystem fs = taskOutPath.getFileSystem(conf);
    if (!fs.exists(taskOutPath))
        throw new IOException("Task output path " + taskOutPath.toString() + "does not exist.");

    // Move the task outputs to their final places
    context.getProgressible().progress();
    moveFinalTaskOutputs(context, fs, taskOutPath);

    // Delete the temporary task-specific output directory
    if (!fs.delete(taskOutPath, true))
        LOG.debug(//  w  ww .  j a v  a  2 s  .co  m
                "Failed to delete the temporary output directory of task: " + attemptId + " - " + taskOutPath);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param conf the job to sample/*from  w ww .  j  a  va 2  s  .  co  m*/
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
* @throws IllegalAccessException 
* @throws InstantiationException 
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; ++i) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    int index0 = -1, i = 0;
    boolean lessthan0 = true;
    for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
        writer.append(splitValue, nullValue);
        if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
            index0 = i;
            lessthan0 = false;
        }
        i++;
    }
    if (lessthan0)
        index0 = partitions - 1;
    writer.close();

    return index0;
}

From source file:com.ibm.bi.dml.runtime.transform.DataTransform.java

License:Open Source License

/**
 * Helper function to move transformation metadata files from a temporary
 * location to permanent location. These files (e.g., header before and
 * after transformation) are generated by a single mapper, while applying
 * data transformations. Note that, these files must be ultimately be placed
 * under the existing metadata directory (txMtdPath), which is
 * simultaneously read by other mappers. If they are not created at a
 * temporary location, then MR tasks fail due to changing timestamps on
 * txMtdPath./*from   ww  w .j  ava  2s .c  o m*/
 * 
 * @param fs
 * @param tmpPath
 * @param txMtdPath
 * @throws IllegalArgumentException
 * @throws IOException
 */
private static void moveFilesFromTmp(FileSystem fs, String tmpPath, String txMtdPath)
        throws IllegalArgumentException, IOException {
    // move files from temporary location to txMtdPath
    MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.OUT_HEADER,
            txMtdPath + "/" + TransformationAgent.OUT_HEADER);
    MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.OUT_DCD_HEADER,
            txMtdPath + "/" + TransformationAgent.OUT_DCD_HEADER);
    MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.COLTYPES_FILE_NAME,
            txMtdPath + "/" + TransformationAgent.COLTYPES_FILE_NAME);

    if (fs.exists(new Path(tmpPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME))) {
        if (!fs.exists(new Path(txMtdPath + "/Dummycode/")))
            fs.mkdirs(new Path(txMtdPath + "/Dummycode/"));
        MapReduceTool.renameFileOnHDFS(tmpPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME,
                txMtdPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME);
    }
}