List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:org.apache.apex.malhar.lib.fs.s3.S3InitiateFileUploadOperator.java
License:Apache License
/** * Generates the key name from the given file path and output directory path. * @param filePath file path to upload//from w w w . ja v a2 s . co m * @return key name for the given file */ private String getKeyName(String filePath) { return outputDirectoryPath + Path.SEPARATOR + StringUtils.removeStart(filePath, Path.SEPARATOR); }
From source file:org.apache.apex.malhar.lib.fs.s3.S3Reconciler.java
License:Apache License
/** * Uploads the file on Amazon S3 using putObject API from S3 client *//* w w w . j a v a 2 s .c om*/ @Override protected void processCommittedData(FSRecordCompactionOperator.OutputMetaData outputMetaData) { try { Path path = new Path(outputMetaData.getPath()); if (fs.exists(path) == false) { logger.debug("Ignoring non-existent path assuming replay : {}", path); return; } FSDataInputStream fsinput = fs.open(path); ObjectMetadata omd = new ObjectMetadata(); omd.setContentLength(outputMetaData.getSize()); String keyName = directoryName + Path.SEPARATOR + outputMetaData.getFileName(); PutObjectRequest request = new PutObjectRequest(bucketName, keyName, fsinput, omd); if (outputMetaData.getSize() < Integer.MAX_VALUE) { request.getRequestClientOptions().setReadLimit((int) outputMetaData.getSize()); } else { throw new RuntimeException("PutRequestSize greater than Integer.MAX_VALUE"); } if (fs.exists(path)) { PutObjectResult result = s3client.putObject(request); logger.debug("File {} Uploaded at {}", keyName, result.getETag()); } } catch (FileNotFoundException e) { logger.debug("Ignoring non-existent path assuming replay : {}", outputMetaData.getPath()); } catch (IOException e) { logger.error("Unable to create Stream: {}", e.getMessage()); } }
From source file:org.apache.apex.malhar.lib.fs.s3.S3Reconciler.java
License:Apache License
/** * Clears intermediate/temporary files if any *///w w w . ja va2s .c o m @Override public void endWindow() { while (doneTuples.peek() != null) { FSRecordCompactionOperator.OutputMetaData metaData = doneTuples.poll(); removeIntermediateFiles(metaData); if (outputPort.isConnected()) { // Emit the meta data with S3 path metaData.setPath(getDirectoryName() + Path.SEPARATOR + metaData.getFileName()); outputPort.emit(metaData); } } }
From source file:org.apache.apex.malhar.lib.fs.s3.S3ReconcilerTest.java
License:Apache License
@Test public void verifyS3ReconclierOutputTuple() throws Exception { String fileName = "s3-compaction_1.0"; String path = testMeta.outputPath + Path.SEPARATOR + fileName; long size = 80; File file = new File(path); File tmpFile = new File(path + "." + System.currentTimeMillis() + ".tmp"); StringBuffer sb = new StringBuffer(); for (int i = 0; i < 10; i++) { sb.append("Record" + i + "\n"); if (i == 5) { FileUtils.write(tmpFile, sb.toString()); }/* www. j a v a 2s .c om*/ } FileUtils.write(file, sb.toString()); // Set test sink and later on collect the emitted tuples in this sink. testMeta.sink = new CollectorTestSink<Object>(); testMeta.underTest.outputPort.setSink(testMeta.sink); // Create meta information to be emitted as tuple. FSRecordCompactionOperator.OutputMetaData outputMetaData = new FSRecordCompactionOperator.OutputMetaData( path, fileName, size); testMeta.underTest.beginWindow(0); testMeta.underTest.input.process(outputMetaData); testMeta.underTest.endWindow(); for (int i = 1; i < 60; i++) { testMeta.underTest.beginWindow(i); testMeta.underTest.endWindow(); } testMeta.underTest.committed(59); // retrieve the result count from output port. testMeta.sink.waitForResultCount(1, 12000); for (int i = 60; i < 70; i++) { testMeta.underTest.beginWindow(i); Thread.sleep(10); testMeta.underTest.endWindow(); } // verify the number of tuples emitted. Assert.assertEquals(1, testMeta.sink.getCount(false)); }
From source file:org.apache.apex.malhar.lib.fs.s3.S3ReconcilerTest.java
License:Apache License
@Test public void testFileClearing() throws Exception { String fileName = "s3-compaction_1.0"; String path = testMeta.outputPath + Path.SEPARATOR + fileName; long size = 80; File file = new File(path); File tmpFile = new File(path + "." + System.currentTimeMillis() + ".tmp"); StringBuffer sb = new StringBuffer(); for (int i = 0; i < 10; i++) { sb.append("Record" + i + "\n"); if (i == 5) { FileUtils.write(tmpFile, sb.toString()); }//from ww w . ja v a 2s . c o m } FileUtils.write(file, sb.toString()); FSRecordCompactionOperator.OutputMetaData outputMetaData = new FSRecordCompactionOperator.OutputMetaData( path, fileName, size); testMeta.underTest.beginWindow(0); testMeta.underTest.input.process(outputMetaData); testMeta.underTest.endWindow(); for (int i = 1; i < 60; i++) { testMeta.underTest.beginWindow(i); testMeta.underTest.endWindow(); } testMeta.underTest.committed(59); for (int i = 60; i < 70; i++) { testMeta.underTest.beginWindow(i); Thread.sleep(10); testMeta.underTest.endWindow(); } Collection<File> files = FileUtils.listFiles(new File(testMeta.outputPath), null, true); Assert.assertEquals(0, files.size()); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java
License:Apache License
@Override public void setup(Context.OperatorContext context) { LOG.debug("setup initiated"); if (expireStreamAfterAccessMillis == null) { expireStreamAfterAccessMillis = (long) (context .getValue(Context.DAGContext.STREAMING_WINDOW_SIZE_MILLIS) * context.getValue(Context.DAGContext.CHECKPOINT_WINDOW_COUNT)); }//from w w w .j a va 2 s. co m rollingFile = (maxLength < Long.MAX_VALUE) || (rotationWindows > 0); //Getting required file system instance. try { fs = getFSInstance(); } catch (IOException ex) { throw new RuntimeException(ex); } if (replication <= 0) { replication = fs.getDefaultReplication(new Path(filePath)); } LOG.debug("FS class {}", fs.getClass()); //building cache RemovalListener<String, FSFilterStreamContext> removalListener = createCacheRemoveListener(); CacheLoader<String, FSFilterStreamContext> loader = createCacheLoader(); streamsCache = CacheBuilder.newBuilder().maximumSize(maxOpenFiles) .expireAfterAccess(expireStreamAfterAccessMillis, TimeUnit.MILLISECONDS) .removalListener(removalListener).build(loader); LOG.debug("File system class: {}", fs.getClass()); LOG.debug("end-offsets {}", endOffsets); try { //Restore the files in case they were corrupted and the operator was re-deployed. Path writerPath = new Path(filePath); if (fs.exists(writerPath)) { for (String seenFileName : endOffsets.keySet()) { String seenFileNamePart = getPartFileNamePri(seenFileName); LOG.debug("seenFileNamePart: {}", seenFileNamePart); Path activeFilePath; if (alwaysWriteToTmp) { String tmpFileName = fileNameToTmpName.get(seenFileNamePart); activeFilePath = new Path(filePath + Path.SEPARATOR + tmpFileName); } else { activeFilePath = new Path(filePath + Path.SEPARATOR + seenFileNamePart); } if (fs.exists(activeFilePath)) { recoverFile(seenFileName, seenFileNamePart, activeFilePath); } } } if (rollingFile) { //delete the left over future rolling files produced from the previous crashed instance of this operator. for (String seenFileName : endOffsets.keySet()) { try { Integer fileOpenPart = this.openPart.get(seenFileName).getValue(); int nextPart = fileOpenPart + 1; String seenPartFileName; while (true) { seenPartFileName = getPartFileName(seenFileName, nextPart); Path activePath = null; if (alwaysWriteToTmp) { String tmpFileName = fileNameToTmpName.get(seenPartFileName); if (tmpFileName != null) { activePath = new Path(filePath + Path.SEPARATOR + tmpFileName); } } else { activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName); } if (activePath == null || !fs.exists(activePath)) { break; } fs.delete(activePath, true); nextPart++; } seenPartFileName = getPartFileName(seenFileName, fileOpenPart); Path activePath = null; if (alwaysWriteToTmp) { String tmpFileName = fileNameToTmpName.get(seenPartFileName); if (tmpFileName != null) { activePath = new Path( filePath + Path.SEPARATOR + fileNameToTmpName.get(seenPartFileName)); } } else { activePath = new Path(filePath + Path.SEPARATOR + seenPartFileName); } if (activePath != null && fs.exists(activePath) && fs.getFileStatus(activePath).getLen() > maxLength) { //Handle the case when restoring to a checkpoint where the current rolling file //already has a length greater than max length. LOG.debug("rotating file at setup."); rotate(seenFileName); } } catch (IOException | ExecutionException e) { throw new RuntimeException(e); } } } LOG.debug("setup completed"); } catch (IOException e) { throw new RuntimeException(e); } this.context = context; fileCounters.setCounter(Counters.TOTAL_BYTES_WRITTEN, new MutableLong()); fileCounters.setCounter(Counters.TOTAL_TIME_WRITING_MILLISECONDS, new MutableLong()); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java
License:Apache License
/** * Recovers a file which exists on the disk. If the length of the file is not same as the * length which the operator remembers then the file is truncated. <br/> * When always writing to a temporary file, then a file is restored even when the length is same as what the * operator remembers however this is done only for files which had open streams that weren't closed before * failure./* w w w . j ava 2 s . c o m*/ * * @param filename name of the actual file. * @param partFileName name of the part file. When not rolling this is same as filename; otherwise this is the * latest open part file name. * @param filepath path of the file. When always writing to temp file, this is the path of the temp file; * otherwise path of the actual file. * @throws IOException */ private void recoverFile(String filename, String partFileName, Path filepath) throws IOException { LOG.debug("path exists {}", filepath); long offset = endOffsets.get(filename).longValue(); FSDataInputStream inputStream = fs.open(filepath); FileStatus status = fs.getFileStatus(filepath); if (status.getLen() != offset) { LOG.info("path corrupted {} {} {}", filepath, offset, status.getLen()); byte[] buffer = new byte[COPY_BUFFER_SIZE]; String recoveryFileName = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION; Path recoveryFilePath = new Path(filePath + Path.SEPARATOR + recoveryFileName); FSDataOutputStream fsOutput = openStream(recoveryFilePath, false); while (inputStream.getPos() < offset) { long remainingBytes = offset - inputStream.getPos(); int bytesToWrite = remainingBytes < COPY_BUFFER_SIZE ? (int) remainingBytes : COPY_BUFFER_SIZE; inputStream.read(buffer); fsOutput.write(buffer, 0, bytesToWrite); } flush(fsOutput); fsOutput.close(); inputStream.close(); LOG.debug("active {} recovery {} ", filepath, recoveryFilePath); if (alwaysWriteToTmp) { //recovery file is used as the new tmp file and we cannot delete the old tmp file because when the operator //is restored to an earlier check-pointed window, it will look for an older tmp. fileNameToTmpName.put(partFileName, recoveryFileName); } else { LOG.debug("recovery path {} actual path {} ", recoveryFilePath, status.getPath()); rename(recoveryFilePath, status.getPath()); } } else { if (alwaysWriteToTmp && filesWithOpenStreams.contains(filename)) { String currentTmp = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION; FSDataOutputStream outputStream = openStream(new Path(filePath + Path.SEPARATOR + currentTmp), false); IOUtils.copy(inputStream, outputStream); streamsCache.put(filename, new FSFilterStreamContext(outputStream)); fileNameToTmpName.put(partFileName, currentTmp); } inputStream.close(); } }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java
License:Apache License
/** * Creates the {@link CacheLoader} for loading an output stream when it is not present in the cache. * @return cache loader/*from w w w. j a va 2s . com*/ */ private CacheLoader<String, FSFilterStreamContext> createCacheLoader() { return new CacheLoader<String, FSFilterStreamContext>() { @Override public FSFilterStreamContext load(@Nonnull String filename) { if (rollingFile) { RotationState state = getRotationState(filename); if (rollingFile && state.rotated) { openPart.get(filename).add(1); state.rotated = false; MutableLong offset = endOffsets.get(filename); offset.setValue(0); } } String partFileName = getPartFileNamePri(filename); Path originalFilePath = new Path(filePath + Path.SEPARATOR + partFileName); Path activeFilePath; if (!alwaysWriteToTmp) { activeFilePath = originalFilePath; } else { //MLHR-1776 : writing to tmp file String tmpFileName = fileNameToTmpName.get(partFileName); if (tmpFileName == null) { tmpFileName = partFileName + '.' + System.currentTimeMillis() + TMP_EXTENSION; fileNameToTmpName.put(partFileName, tmpFileName); } activeFilePath = new Path(filePath + Path.SEPARATOR + tmpFileName); } FSDataOutputStream fsOutput; boolean sawThisFileBefore = endOffsets.containsKey(filename); try { if (fs.exists(originalFilePath) || (alwaysWriteToTmp && fs.exists(activeFilePath))) { if (sawThisFileBefore) { FileStatus fileStatus = fs.getFileStatus(activeFilePath); MutableLong endOffset = endOffsets.get(filename); if (endOffset != null) { endOffset.setValue(fileStatus.getLen()); } else { endOffsets.put(filename, new MutableLong(fileStatus.getLen())); } fsOutput = openStream(activeFilePath, true); LOG.debug("appending to {}", activeFilePath); } else { //We never saw this file before and we don't want to append //If the file is rolling we need to delete all its parts. if (rollingFile) { int part = 0; while (true) { Path seenPartFilePath = new Path( filePath + Path.SEPARATOR + getPartFileName(filename, part)); if (!fs.exists(seenPartFilePath)) { break; } fs.delete(seenPartFilePath, true); part = part + 1; } fsOutput = openStream(activeFilePath, false); } else { //Not rolling is easy, just delete the file and create it again. fs.delete(activeFilePath, true); if (alwaysWriteToTmp) { //we need to delete original file if that exists if (fs.exists(originalFilePath)) { fs.delete(originalFilePath, true); } } fsOutput = openStream(activeFilePath, false); } } } else { fsOutput = openStream(activeFilePath, false); } filesWithOpenStreams.add(filename); LOG.info("opened {}, active {}", partFileName, activeFilePath); return new FSFilterStreamContext(fsOutput); } catch (IOException e) { throw new RuntimeException(e); } } }; }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperator.java
License:Apache License
/** * Finalizing a file means that the same file will never be open again. * * @param fileName name of the file to finalize *///from w w w . jav a 2 s . c o m protected void finalizeFile(String fileName) throws IOException { String tmpFileName = fileNameToTmpName.get(fileName); Path srcPath = new Path(filePath + Path.SEPARATOR + tmpFileName); Path destPath = new Path(filePath + Path.SEPARATOR + fileName); if (!fs.exists(destPath)) { LOG.debug("rename from tmp {} actual {} ", tmpFileName, fileName); rename(srcPath, destPath); } else if (fs.exists(srcPath)) { /*if the destination and src both exists that means there was a failure between file rename and clearing the endOffset so we just delete the tmp file*/ LOG.debug("deleting tmp {}", tmpFileName); fs.delete(srcPath, true); } endOffsets.remove(fileName); fileNameToTmpName.remove(fileName); //when writing to tmp files there can be vagrant tmp files which we have to clean FileStatus[] statuses = fs.listStatus(destPath.getParent()); for (FileStatus status : statuses) { String statusName = status.getPath().getName(); if (statusName.endsWith(TMP_EXTENSION) && statusName.startsWith(destPath.getName())) { //a tmp file has tmp extension always preceded by timestamp String actualFileName = statusName.substring(0, statusName.lastIndexOf('.', statusName.lastIndexOf('.') - 1)); if (fileName.equals(actualFileName)) { LOG.debug("deleting stray file {}", statusName); fs.delete(status.getPath(), true); } } } }
From source file:org.apache.apex.malhar.lib.io.fs.FileSplitterInputTest.java
License:Apache License
@Test public void testScannerFilterForDuplicates() throws InterruptedException { String filePath = testMeta.dataDirectory + Path.SEPARATOR + "file0.txt"; testMeta.scanner = new MockScanner(); testMeta.fileSplitterInput.setScanner(testMeta.scanner); testMeta.fileSplitterInput.getScanner().setScanIntervalMillis(500); testMeta.fileSplitterInput.getScanner().setFilePatternRegularExp(".*[.]txt"); testMeta.fileSplitterInput.getScanner().setFiles(filePath); testMeta.fileSplitterInput.setup(testMeta.context); testMeta.fileSplitterInput.beginWindow(1); testMeta.scanner.semaphore.acquire(); testMeta.fileSplitterInput.emitTuples(); testMeta.fileSplitterInput.endWindow(); testMeta.fileSplitterInput.beginWindow(2); testMeta.fileSplitterInput.emitTuples(); testMeta.fileSplitterInput.endWindow(); Assert.assertEquals("File metadata", 1, testMeta.fileMetadataSink.collectedTuples.size()); for (Object fileMetadata : testMeta.fileMetadataSink.collectedTuples) { FileSplitterInput.FileMetadata metadata = (FileSplitterInput.FileMetadata) fileMetadata; Assert.assertTrue("path: " + metadata.getFilePath(), testMeta.filePaths.contains(metadata.getFilePath())); Assert.assertNotNull("name: ", metadata.getFileName()); }/* w w w . j a va 2 s. c o m*/ testMeta.fileMetadataSink.collectedTuples.clear(); testMeta.fileSplitterInput.teardown(); }