Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:org.apache.apex.malhar.lib.fs.s3.S3InitiateFileUploadOperator.java

License:Apache License

/**
 * Generates the key name from the given file path and output directory path.
 * @param filePath file path to upload//from   w  w w .  ja v a2  s  .  co  m
 * @return key name for the given file
 */
private String getKeyName(String filePath) {
    return outputDirectoryPath + Path.SEPARATOR + StringUtils.removeStart(filePath, Path.SEPARATOR);
}

From source file:org.apache.apex.malhar.lib.fs.s3.S3Reconciler.java

License:Apache License

/**
 * Uploads the file on Amazon S3 using putObject API from S3 client
 *//* w w w  . j  a  v a  2 s  .c  om*/
@Override
protected void processCommittedData(FSRecordCompactionOperator.OutputMetaData outputMetaData) {
    try {
        Path path = new Path(outputMetaData.getPath());
        if (fs.exists(path) == false) {
            logger.debug("Ignoring non-existent path assuming replay : {}", path);
            return;
        }
        FSDataInputStream fsinput = fs.open(path);
        ObjectMetadata omd = new ObjectMetadata();
        omd.setContentLength(outputMetaData.getSize());
        String keyName = directoryName + Path.SEPARATOR + outputMetaData.getFileName();
        PutObjectRequest request = new PutObjectRequest(bucketName, keyName, fsinput, omd);
        if (outputMetaData.getSize() < Integer.MAX_VALUE) {
            request.getRequestClientOptions().setReadLimit((int) outputMetaData.getSize());
        } else {
            throw new RuntimeException("PutRequestSize greater than Integer.MAX_VALUE");
        }
        if (fs.exists(path)) {
            PutObjectResult result = s3client.putObject(request);
            logger.debug("File {} Uploaded at {}", keyName, result.getETag());
        }
    } catch (FileNotFoundException e) {
        logger.debug("Ignoring non-existent path assuming replay : {}", outputMetaData.getPath());
    } catch (IOException e) {
        logger.error("Unable to create Stream: {}", e.getMessage());
    }
}

From source file:org.apache.apex.malhar.lib.fs.s3.S3Reconciler.java

License:Apache License

/**
 * Clears intermediate/temporary files if any
 *///w w w . ja va2s .c o  m
@Override
public void endWindow() {
    while (doneTuples.peek() != null) {
        FSRecordCompactionOperator.OutputMetaData metaData = doneTuples.poll();
        removeIntermediateFiles(metaData);
        if (outputPort.isConnected()) {
            // Emit the meta data with S3 path
            metaData.setPath(getDirectoryName() + Path.SEPARATOR + metaData.getFileName());
            outputPort.emit(metaData);
        }
    }
}

From source file:org.apache.apex.malhar.lib.fs.s3.S3ReconcilerTest.java

License:Apache License

@Test
public void verifyS3ReconclierOutputTuple() throws Exception {
    String fileName = "s3-compaction_1.0";
    String path = testMeta.outputPath + Path.SEPARATOR + fileName;
    long size = 80;

    File file = new File(path);
    File tmpFile = new File(path + "." + System.currentTimeMillis() + ".tmp");
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < 10; i++) {
        sb.append("Record" + i + "\n");
        if (i == 5) {
            FileUtils.write(tmpFile, sb.toString());
        }/*  www. j a  v  a 2s  .c  om*/
    }
    FileUtils.write(file, sb.toString());

    // Set test sink and later on collect the emitted tuples in this sink.
    testMeta.sink = new CollectorTestSink<Object>();
    testMeta.underTest.outputPort.setSink(testMeta.sink);

    // Create meta information to be emitted as tuple.
    FSRecordCompactionOperator.OutputMetaData outputMetaData = new FSRecordCompactionOperator.OutputMetaData(
            path, fileName, size);
    testMeta.underTest.beginWindow(0);
    testMeta.underTest.input.process(outputMetaData);
    testMeta.underTest.endWindow();

    for (int i = 1; i < 60; i++) {
        testMeta.underTest.beginWindow(i);
        testMeta.underTest.endWindow();
    }
    testMeta.underTest.committed(59);

    // retrieve the result count from output port.
    testMeta.sink.waitForResultCount(1, 12000);

    for (int i = 60; i < 70; i++) {
        testMeta.underTest.beginWindow(i);
        Thread.sleep(10);
        testMeta.underTest.endWindow();
    }

    // verify the number of tuples emitted.
    Assert.assertEquals(1, testMeta.sink.getCount(false));
}

From source file:org.apache.apex.malhar.lib.fs.s3.S3ReconcilerTest.java

License:Apache License

@Test
public void testFileClearing() throws Exception {
    String fileName = "s3-compaction_1.0";
    String path = testMeta.outputPath + Path.SEPARATOR + fileName;
    long size = 80;

    File file = new File(path);
    File tmpFile = new File(path + "." + System.currentTimeMillis() + ".tmp");
    StringBuffer sb = new StringBuffer();
    for (int i = 0; i < 10; i++) {
        sb.append("Record" + i + "\n");
        if (i == 5) {
            FileUtils.write(tmpFile, sb.toString());
        }//from ww w . ja v  a 2s . c o  m
    }
    FileUtils.write(file, sb.toString());

    FSRecordCompactionOperator.OutputMetaData outputMetaData = new FSRecordCompactionOperator.OutputMetaData(
            path, fileName, size);
    testMeta.underTest.beginWindow(0);
    testMeta.underTest.input.process(outputMetaData);
    testMeta.underTest.endWindow();

    for (int i = 1; i < 60; i++) {
        testMeta.underTest.beginWindow(i);
        testMeta.underTest.endWindow();
    }
    testMeta.underTest.committed(59);
    for (int i = 60; i < 70; i++) {
        testMeta.underTest.beginWindow(i);
        Thread.sleep(10);
        testMeta.underTest.endWindow();
    }
    Collection<File> files = FileUtils.listFiles(new File(testMeta.outputPath), null, true);
    Assert.assertEquals(0, files.size());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java

License:Apache License

@Override
public void setup(Context.OperatorContext context) {
    LOG.debug("setup initiated");
    if (expireStreamAfterAccessMillis == null) {
        expireStreamAfterAccessMillis = (long) (context
                .getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS)
                * context.getValue(Context.DAGContext.CHECKPOINT_WINDOW_COUNT));
    }//from  w w w  .j a  va  2  s.  co  m
    rollingFile = (maxLength < Long.MAX_VALUE) || (rotationWindows > 0);

    //Getting required file system instance.
    try {
        fs = getFSInstance();
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }

    if (replication <= 0) {
        replication = fs.getDefaultReplication(new Path(filePath));
    }

    LOG.debug("FS class {}", fs.getClass());

    //building cache
    RemovalListener<String, FSFilterStreamContext> removalListener = createCacheRemoveListener();
    CacheLoader<String, FSFilterStreamContext> loader = createCacheLoader();
    streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles)
            .expireAfterAccess(expireStreamAfterAccessMillis, TimeUnit.MILLISECONDS)
            .removalListener(removalListener).build(loader);

    LOG.debug("File system class: {}", fs.getClass());
    LOG.debug("end-offsets {}", endOffsets);

    try {
        //Restore the files in case they were corrupted and the operator was re-deployed.
        Path writerPath = new Path(filePath);
        if (fs.exists(writerPath)) {
            for (String seenFileName : endOffsets.keySet()) {
                String seenFileNamePart = getPartFileNamePri(seenFileName);
                LOG.debug("seenFileNamePart: {}", seenFileNamePart);

                Path activeFilePath;
                if (alwaysWriteToTmp) {
                    String tmpFileName = fileNameToTmpName.get(seenFileNamePart);
                    activeFilePath = new Path(filePath + Path.SEPARATOR + tmpFileName);
                } else {
                    activeFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart);
                }

                if (fs.exists(activeFilePath)) {
                    recoverFile(seenFileName, seenFileNamePart, activeFilePath);
                }
            }
        }

        if (rollingFile) {
            //delete the left over future rolling files produced from the previous crashed instance of this operator.
            for (String seenFileName : endOffsets.keySet()) {
                try {
                    Integer fileOpenPart = this.openPart.get(seenFileName).getValue();
                    int nextPart = fileOpenPart + 1;
                    String seenPartFileName;
                    while (true) {
                        seenPartFileName = getPartFileName(seenFileName, nextPart);
                        Path activePath = null;
                        if (alwaysWriteToTmp) {
                            String tmpFileName = fileNameToTmpName.get(seenPartFileName);
                            if (tmpFileName != null) {
                                activePath = new Path(filePath + Path.SEPARATOR + tmpFileName);
                            }
                        } else {
                            activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName);
                        }
                        if (activePath == null || !fs.exists(activePath)) {
                            break;
                        }

                        fs.delete(activePath, true);
                        nextPart++;
                    }

                    seenPartFileName = getPartFileName(seenFileName, fileOpenPart);
                    Path activePath = null;
                    if (alwaysWriteToTmp) {
                        String tmpFileName = fileNameToTmpName.get(seenPartFileName);
                        if (tmpFileName != null) {
                            activePath = new Path(
                                    filePath + Path.SEPARATOR + fileNameToTmpName.get(seenPartFileName));
                        }
                    } else {
                        activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName);
                    }

                    if (activePath != null && fs.exists(activePath)
                            && fs.getFileStatus(activePath).getLen() > maxLength) {
                        //Handle the case when restoring to a checkpoint where the current rolling file
                        //already has a length greater than max length.
                        LOG.debug("rotating file at setup.");
                        rotate(seenFileName);
                    }
                } catch (IOException | ExecutionException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        LOG.debug("setup completed");
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    this.context = context;

    fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong());
    fileCounters.setCounter(Counters.TOTAL_TIME_WRITING_MILLISECONDS, new MutableLong());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java

License:Apache License

/**
 * Recovers a file which exists on the disk. If the length of the file is not same as the
 * length which the operator remembers then the file is truncated. <br/>
 * When always writing to a temporary file, then a file is restored even when the length is same as what the
 * operator remembers however this is done only for files which had open streams that weren't closed before
 * failure./* w w w  . j  ava 2 s  .  c o m*/
 *
 * @param filename     name of the actual file.
 * @param partFileName name of the part file. When not rolling this is same as filename; otherwise this is the
 *                     latest open part file name.
 * @param filepath     path of the file. When always writing to temp file, this is the path of the temp file;
 *                     otherwise path of the actual file.
 * @throws IOException
 */
private void recoverFile(String filename, String partFileName, Path filepath) throws IOException {
    LOG.debug("path exists {}", filepath);
    long offset = endOffsets.get(filename).longValue();
    FSDataInputStream inputStream = fs.open(filepath);
    FileStatus status = fs.getFileStatus(filepath);

    if (status.getLen() != offset) {
        LOG.info("path corrupted {} {} {}", filepath, offset, status.getLen());
        byte[] buffer = new byte[COPY_BUFFER_SIZE];
        String recoveryFileName = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION;
        Path recoveryFilePath = new Path(filePath + Path.SEPARATOR + recoveryFileName);
        FSDataOutputStream fsOutput = openStream(recoveryFilePath, false);

        while (inputStream.getPos() < offset) {
            long remainingBytes = offset - inputStream.getPos();
            int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE;
            inputStream.read(buffer);
            fsOutput.write(buffer, 0, bytesToWrite);
        }

        flush(fsOutput);
        fsOutput.close();
        inputStream.close();

        LOG.debug("active {} recovery {} ", filepath, recoveryFilePath);

        if (alwaysWriteToTmp) {
            //recovery file is used as the new tmp file and we cannot delete the old tmp file because when the operator
            //is restored to an earlier check-pointed window, it will look for an older tmp.
            fileNameToTmpName.put(partFileName, recoveryFileName);
        } else {
            LOG.debug("recovery path {} actual path {} ", recoveryFilePath, status.getPath());
            rename(recoveryFilePath, status.getPath());
        }
    } else {
        if (alwaysWriteToTmp && filesWithOpenStreams.contains(filename)) {
            String currentTmp = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION;
            FSDataOutputStream outputStream = openStream(new Path(filePath + Path.SEPARATOR + currentTmp),
                    false);
            IOUtils.copy(inputStream, outputStream);
            streamsCache.put(filename, new FSFilterStreamContext(outputStream));
            fileNameToTmpName.put(partFileName, currentTmp);
        }
        inputStream.close();
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java

License:Apache License

/**
 * Creates the {@link CacheLoader} for loading an output stream when it is not present in the cache.
 * @return cache loader/*from w  w w. j  a va 2s . com*/
 */
private CacheLoader<String, FSFilterStreamContext> createCacheLoader() {
    return new CacheLoader<String, FSFilterStreamContext>() {
        @Override
        public FSFilterStreamContext load(@Nonnull String filename) {
            if (rollingFile) {
                RotationState state = getRotationState(filename);
                if (rollingFile && state.rotated) {
                    openPart.get(filename).add(1);
                    state.rotated = false;
                    MutableLong offset = endOffsets.get(filename);
                    offset.setValue(0);
                }
            }

            String partFileName = getPartFileNamePri(filename);
            Path originalFilePath = new Path(filePath + Path.SEPARATOR + partFileName);

            Path activeFilePath;
            if (!alwaysWriteToTmp) {
                activeFilePath = originalFilePath;
            } else {
                //MLHR-1776 : writing to tmp file
                String tmpFileName = fileNameToTmpName.get(partFileName);
                if (tmpFileName == null) {
                    tmpFileName = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION;
                    fileNameToTmpName.put(partFileName, tmpFileName);
                }
                activeFilePath = new Path(filePath + Path.SEPARATOR + tmpFileName);
            }

            FSDataOutputStream fsOutput;

            boolean sawThisFileBefore = endOffsets.containsKey(filename);

            try {
                if (fs.exists(originalFilePath) || (alwaysWriteToTmp && fs.exists(activeFilePath))) {
                    if (sawThisFileBefore) {
                        FileStatus fileStatus = fs.getFileStatus(activeFilePath);
                        MutableLong endOffset = endOffsets.get(filename);

                        if (endOffset != null) {
                            endOffset.setValue(fileStatus.getLen());
                        } else {
                            endOffsets.put(filename, new MutableLong(fileStatus.getLen()));
                        }

                        fsOutput = openStream(activeFilePath, true);
                        LOG.debug("appending to {}", activeFilePath);
                    } else {
                        //We never saw this file before and we don't want to append
                        //If the file is rolling we need to delete all its parts.
                        if (rollingFile) {
                            int part = 0;

                            while (true) {
                                Path seenPartFilePath = new Path(
                                        filePath + Path.SEPARATOR + getPartFileName(filename, part));
                                if (!fs.exists(seenPartFilePath)) {
                                    break;
                                }

                                fs.delete(seenPartFilePath, true);
                                part = part + 1;
                            }

                            fsOutput = openStream(activeFilePath, false);
                        } else {
                            //Not rolling is easy, just delete the file and create it again.
                            fs.delete(activeFilePath, true);
                            if (alwaysWriteToTmp) {
                                //we need to delete original file if that exists
                                if (fs.exists(originalFilePath)) {
                                    fs.delete(originalFilePath, true);
                                }
                            }
                            fsOutput = openStream(activeFilePath, false);
                        }
                    }
                } else {
                    fsOutput = openStream(activeFilePath, false);
                }
                filesWithOpenStreams.add(filename);

                LOG.info("opened {}, active {}", partFileName, activeFilePath);
                return new FSFilterStreamContext(fsOutput);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java

License:Apache License

/**
 * Finalizing a file means that the same file will never be open again.
 *
 * @param fileName name of the file to finalize
 *///from   w w w .  jav a 2 s  . c  o  m
protected void finalizeFile(String fileName) throws IOException {
    String tmpFileName = fileNameToTmpName.get(fileName);
    Path srcPath = new Path(filePath + Path.SEPARATOR + tmpFileName);
    Path destPath = new Path(filePath + Path.SEPARATOR + fileName);

    if (!fs.exists(destPath)) {
        LOG.debug("rename from tmp {} actual {} ", tmpFileName, fileName);
        rename(srcPath, destPath);
    } else if (fs.exists(srcPath)) {
        /*if the destination and src both exists that means there was a failure between file rename and clearing the
        endOffset so we just delete the tmp file*/
        LOG.debug("deleting tmp {}", tmpFileName);
        fs.delete(srcPath, true);
    }
    endOffsets.remove(fileName);
    fileNameToTmpName.remove(fileName);

    //when writing to tmp files there can be vagrant tmp files which we have to clean
    FileStatus[] statuses = fs.listStatus(destPath.getParent());
    for (FileStatus status : statuses) {
        String statusName = status.getPath().getName();
        if (statusName.endsWith(TMP_EXTENSION) && statusName.startsWith(destPath.getName())) {
            //a tmp file has tmp extension always preceded by timestamp
            String actualFileName = statusName.substring(0,
                    statusName.lastIndexOf('.', statusName.lastIndexOf('.') - 1));
            if (fileName.equals(actualFileName)) {
                LOG.debug("deleting stray file {}", statusName);
                fs.delete(status.getPath(), true);
            }
        }
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.FileSplitterInputTest.java

License:Apache License

@Test
public void testScannerFilterForDuplicates() throws InterruptedException {
    String filePath = testMeta.dataDirectory + Path.SEPARATOR + "file0.txt";
    testMeta.scanner = new MockScanner();
    testMeta.fileSplitterInput.setScanner(testMeta.scanner);
    testMeta.fileSplitterInput.getScanner().setScanIntervalMillis(500);
    testMeta.fileSplitterInput.getScanner().setFilePatternRegularExp(".*[.]txt");
    testMeta.fileSplitterInput.getScanner().setFiles(filePath);

    testMeta.fileSplitterInput.setup(testMeta.context);
    testMeta.fileSplitterInput.beginWindow(1);
    testMeta.scanner.semaphore.acquire();

    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();

    testMeta.fileSplitterInput.beginWindow(2);
    testMeta.fileSplitterInput.emitTuples();
    testMeta.fileSplitterInput.endWindow();

    Assert.assertEquals("File metadata", 1, testMeta.fileMetadataSink.collectedTuples.size());
    for (Object fileMetadata : testMeta.fileMetadataSink.collectedTuples) {
        FileSplitterInput.FileMetadata metadata = (FileSplitterInput.FileMetadata) fileMetadata;
        Assert.assertTrue("path: " + metadata.getFilePath(),
                testMeta.filePaths.contains(metadata.getFilePath()));
        Assert.assertNotNull("name: ", metadata.getFileName());
    }/* w w w  . j  a va  2 s.  c  o m*/

    testMeta.fileMetadataSink.collectedTuples.clear();
    testMeta.fileSplitterInput.teardown();
}