Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.blackberry.logdriver.LockedFs.java

License:Apache License

public void delete(Configuration conf, String toDelete) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path(toDelete);
    if (fs.exists(path)) {
        fs.delete(path, true);//from   w w w  .jav  a  2  s  .c o  m
    } else {
        LOG.warn("File to delete not found:" + toDelete);
    }
}

From source file:com.blackberry.logdriver.util.IndexLogs.java

License:Apache License

private static void updateComponent(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        List<String> unmergedCSVStrings, FileSystem fs, FileStatus matchedFolder, Path path)
        throws IOException, ParseException {
    // Parse path by splitting it across slashes. To determine service (which might contain slashes) grab
    // everything after the DC name, but before the matched date string.
    String[] pathPieces = matchedFolder.getPath().toString().split("/");
    String[] servicePieces = path.toString().split(pathPieces[4] + "/");
    servicePieces = servicePieces[1].split("/" + pathPieces[pathPieces.length - 5]);
    String DC = pathPieces[4];/*from w  w  w  .j  a va  2s.  c  o m*/
    String service = servicePieces[0];
    String component = pathPieces[pathPieces.length - 2];
    String type = pathPieces[pathPieces.length - 5];
    String status = pathPieces[pathPieces.length - 1];
    Date date = inputFormat.parse(pathPieces[pathPieces.length - 4]);

    // If the _READY file doesn't exist, add it to the list
    Path READYPath = new Path(path.toString() + "/_READY");
    // System.out.println("Checking for " + READYPath.toString());
    if (!fs.exists(READYPath)) {
        unmergedCSVStrings.add(DC + "," + service + "," + type + "," + component + ","
                + pathPieces[pathPieces.length - 4] + "," + pathPieces[pathPieces.length - 3] + "\n");
        //System.out.println(unmergedCSVString);
    }

    // Check if there is a matching component, create one if not. 
    if (!componentExists(data, DC, service, type, component)) {
        data.get(DC).get(service).get(type).put(component, new Component(DC, service, type, component, date));
    }

    Component thisComponent = data.get(DC).get(service).get(type).get(component);

    // Update the start or end date if the current date is before or after, respectively. 
    if (date.before(thisComponent.startDate)) {
        thisComponent.startDate = date;
    } else if (date.after(thisComponent.endDate)) {
        thisComponent.endDate = date;
    }

    // Is the current folder an archive? If so and date is later than the current archiveDate, update it. 
    if (status.matches("archive") && date.after(thisComponent.archiveDate)) {
        thisComponent.archiveDate = date;
    }

    // Add size data
    if (status.matches("data")) {
        thisComponent.addDataSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("incoming")) {
        thisComponent.addIncomingSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    } else if (status.matches("archive")) {
        thisComponent.addArchiveSize(fs.getContentSummary(matchedFolder.getPath()).getLength());
    }
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public String parseOut(String arg, FileSystem fs) throws Exception, Exception {
    arg = arg.replace("--out=", "");
    if (!fs.exists((new Path(arg)).getParent())) {
        logConsole(true, true, error, "Parent of specified path does not exist.");
        System.exit(1);/*from w w w  . j  a  v a2 s. c  o  m*/
    }
    if (fs.exists(new Path(arg))) {
        logConsole(true, true, error, "Please specify a non-existing directory to create and output results.");
        System.exit(1);
    }
    return arg;
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void tmpDirHDFS(boolean quiet, boolean silent, FileSystem fs, Configuration conf, String tmp,
        boolean log) {
    logConsole(quiet, silent, info, "Creating new Temp Directory in HDFS: " + tmp);

    try {//  w  ww.  j  a v a  2s.c o  m
        Path path = new Path(tmp);
        if (!(fs.exists(path))) {
            //Create directory
            fs.mkdirs(path);
            if (log != true) {
                fs.deleteOnExit(path);
            }
        }
    } catch (IOException e) {
        if (e.toString().contains("Failed to find any Kerberos")) {
            logConsole(true, true, error, "No/bad Kerberos ticket - please authenticate.");
            System.exit(1);
        } else if (e.toString().contains("quota") && e.toString().contains("exceeded")) {
            logConsole(true, true, error, "Disk quota Exceeded.");
            System.exit(1);
        }
        e.printStackTrace();
        System.exit(1);
    }
}

From source file:com.blm.orc.OrcRawRecordMerger.java

License:Apache License

/**
 * Create a reader that merge sorts the ACID events together.
 * @param conf the configuration/*from   ww  w.  ja va  2  s  . co  m*/
 * @param collapseEvents should the events on the same row be collapsed
 * @param isOriginal is the base file a pre-acid file
 * @param bucket the bucket we are reading
 * @param options the options to read with
 * @param deltaDirectory the list of delta directories to include
 * @throws IOException
 */
OrcRawRecordMerger(Configuration conf, boolean collapseEvents, Reader reader, boolean isOriginal, int bucket,
        ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) throws IOException {
    this.conf = conf;
    this.collapse = collapseEvents;
    this.offset = options.getOffset();
    this.length = options.getLength();
    this.validTxnList = validTxnList;
    // modify the optins to reflect the event instead of the base row
    Reader.Options eventOptions = createEventOptions(options);
    if (reader == null) {
        baseReader = null;
    } else {

        // find the min/max based on the offset and length
        if (isOriginal) {
            discoverOriginalKeyBounds(reader, bucket, options);
        } else {
            discoverKeyBounds(reader, options);
        }
        LOG.info("min key = " + minKey + ", max key = " + maxKey);
        // use the min/max instead of the byte range
        ReaderPair pair;
        ReaderKey key = new ReaderKey();
        if (isOriginal) {
            options = options.clone();
            options.range(options.getOffset(), Long.MAX_VALUE);
            pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options);
        } else {
            pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions);
        }

        // if there is at least one record, put it in the map
        if (pair.nextRecord != null) {
            readers.put(key, pair);
        }
        baseReader = pair.recordReader;
    }

    // we always want to read all of the deltas
    eventOptions.range(0, Long.MAX_VALUE);
    // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
    // it can produce wrong results (if the latest valid version of the record is filtered out by
    // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
    eventOptions.searchArgument(null, null);
    if (deltaDirectory != null) {
        for (Path delta : deltaDirectory) {
            ReaderKey key = new ReaderKey();
            Path deltaFile = AcidUtils.createBucketFile(delta, bucket);
            FileSystem fs = deltaFile.getFileSystem(conf);
            long length = getLastFlushLength(fs, deltaFile);
            if (fs.exists(deltaFile) && length != -1) {
                Reader deltaReader = OrcFile.createReader(deltaFile,
                        OrcFile.readerOptions(conf).maxLength(length));
                ReaderPair deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, eventOptions);
                if (deltaPair.nextRecord != null) {
                    readers.put(key, deltaPair);
                }
            }
        }
    }

    // get the first record
    Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
    if (entry == null) {
        columns = 0;
        primary = null;
    } else {
        primary = entry.getValue();
        if (readers.isEmpty()) {
            secondaryKey = null;
        } else {
            secondaryKey = readers.firstKey();
        }
        // get the number of columns in the user's rows
        columns = primary.getColumns();
    }
}

From source file:com.chinnu.churndetection.fuzzykmeans.FuzzyKMeansReducer.java

@Override
protected void reduce(IntWritable key, Iterable<Vector> values,
        Reducer<IntWritable, Vector, IntWritable, Text>.Context context)
        throws IOException, InterruptedException {

    double[] sum = new double[DATALENGTH];
    for (int i = 0; i < DATALENGTH; i++) {
        sum[i] = 0;/*from w w w.j  av a2 s. c o  m*/
    }

    int count = 0;
    for (Vector vector : values) {

        for (int i = 0; i < DATALENGTH; i++) {
            sum[i] += vector.getData()[i];
        }
        count++;

        Text text = new Text(vector.toString());
        context.write(key, text);
    }

    double[] newCenter = new double[DATALENGTH];
    for (int i = 0; i < DATALENGTH; i++) {
        newCenter[i] = sum[i] / count;
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    List<double[]> curr_center = new ArrayList<>();

    String[] lineSplit = CURR_CENTER.split("\n");
    for (int j = 0; j < lineSplit.length; j++) {
        String line = lineSplit[j];
        String[] split = line.split(",");
        double[] temp = new double[split.length];
        for (int i = 0; i < split.length; i++) {
            temp[i] = Double.parseDouble(split[i]);
        }
        curr_center.add(temp);
    }

    List<String> appendLine = new ArrayList<>();
    if (fs.exists(new Path(NEW_CENTER))) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(new Path(NEW_CENTER))));

        String line;
        while ((line = br.readLine()) != null) {
            appendLine.add(line);
        }
    }

    PrintWriter pw = new PrintWriter(new OutputStreamWriter(fs.create(new Path(NEW_CENTER), true)));
    for (String string : appendLine) {
        pw.println(string);
        pw.flush();
    }

    String line = "";
    for (int i = 0; i < DATALENGTH; i++) {
        line += newCenter[i] + ",";
    }
    String substring = line.substring(0, line.length() - 1);

    pw.println(substring);
    pw.flush();
    pw.close();

    MRLogger.Log(context.getJobName());
    MRLogger.Log(Arrays.toString(curr_center.get(key.get())));
    MRLogger.Log(Arrays.toString(newCenter));

    double curr_Distance = DistanceComparator.findDistance(curr_center.get(key.get()), newCenter);
    MRLogger.Log(curr_Distance + "");

    if (curr_Distance < 0.01) {
        PrintWriter pw1 = new PrintWriter(
                new OutputStreamWriter(fs.create(new Path(ChurnDriver.CENTER_CONVERGED), true)));
        pw1.println("converged");
        pw1.flush();
        pw1.close();
    }

}

From source file:com.cip.crane.agent.utils.TaskHelper.java

License:Open Source License

private void writeFileToHdfs(String srcFile, String destFile) throws IOException {
    File file = new File(srcFile);
    if (!file.exists()) {
        throw new FileNotFoundException("File not found");
    }//from ww  w .ja v  a2 s. c o  m
    byte[] buf = new byte[BUFFER_SIZE];
    FileInputStream input = new FileInputStream(file);
    FileSystem fs = FileSystem.get(URI.create(destFile), conf);
    Path destPath = new Path(destFile);
    if (fs.exists(destPath)) {
        fs.delete(destPath, true);
    }
    FSDataOutputStream hdfsoutput = fs.create(destPath, (short) 2);
    int num = input.read(buf);
    while (num != (-1)) {// ?
        hdfsoutput.write(buf, 0, num);// ?
        hdfsoutput.flush();// ?
        num = input.read(buf);// ??
    }
    input.close();
    hdfsoutput.close();
    fs.close();
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = new Path(outputPath + "_ignore");

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from w  ww . j  a  v a 2s  .c  o  m

    if (fs.exists(ignoreOutputPath)) {
        LOG.info("Deleted " + ignoreOutputPath.toString() + " success.");
        fs.delete(ignoreOutputPath, true);
    }

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                Path path = null;
                path = new Path(outputdir, Bytes.toString(family));
                fs.mkdirs(path);
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
                    .withOutputDir(familydir)
                    .withCompression(AbstractHFileWriter.compressionByName(compression))
                    .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();
            dos.close();
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.cloudera.beeswax.Server.java

License:Apache License

/**
 * Hive won't work unless /tmp and /user/hive/warehouse are usable,
 * so we create them for the user./*from   ww w.  j av a 2 s .c  om*/
 */
private static void createDirectoriesAsNecessary() {
    try {
        LOG.debug("Classpath: " + System.getProperty("java.class.path"));
        HiveConf conf = new HiveConf(Driver.class);
        FileSystem fs = FileSystem.get(conf);
        Path tmpDir = new Path("/tmp");
        Path metaDir = new Path(conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname));
        for (Path dir : new Path[] { tmpDir, metaDir }) {
            if (!fs.exists(dir)) {
                if (fs.mkdirs(dir)) {
                    fs.setPermission(dir, new FsPermission((short) 0777));
                    LOG.info("Created " + dir + " with world-writable permissions.");
                } else {
                    LOG.error("Could not create " + dir);
                }
            }
        }
    } catch (IOException e) {
        HiveConf conf = new HiveConf(Driver.class);
        LOG.error("Error while trying to check/create /tmp and warehouse directory "
                + conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname), e);
    }
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemDatasetRepository.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*from   ww w.java 2  s  . c  om*/
public boolean delete(String name) {
    Preconditions.checkArgument(name != null, "Name can not be null");

    logger.debug("Deleting dataset:{}", name);

    final DatasetDescriptor descriptor;
    try {
        descriptor = metadataProvider.load(name);
    } catch (com.cloudera.cdk.data.NoSuchDatasetException ex) {
        return false;
    }

    boolean changed;
    try {
        // don't care about the return value here -- if it already doesn't exist
        // we still need to delete the data directory
        changed = metadataProvider.delete(name);
    } catch (MetadataProviderException ex) {
        throw new DatasetRepositoryException("Failed to delete descriptor for name:" + name, ex);
    }

    final Path dataLocation = new Path(descriptor.getLocation());
    final FileSystem fs = fsForPath(dataLocation, conf);

    try {
        if (fs.exists(dataLocation)) {
            if (fs.delete(dataLocation, true)) {
                changed = true;
            } else {
                throw new DatasetRepositoryException(
                        "Failed to delete dataset name:" + name + " location:" + dataLocation);
            }
        }
    } catch (IOException e) {
        throw new DatasetRepositoryException("Internal failure when removing location:" + dataLocation);
    }

    return changed;
}