List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:org.deeplearning4j.iterativereduce.runtime.Utils.java
License:Apache License
private static StringBuffer getCommandsBase(Configuration conf, Properties props, String command, String args) { StringBuffer sb = new StringBuffer(); sb.append("java "); sb.append("-Xmx").append(props.getProperty(ConfigFields.YARN_MEMORY, "512")).append("m "); if (args != null) sb.append(" ").append(args).append(" "); // Actual command sb.append(command);//from ww w . ja v a 2 s .c o m sb.append(" 1> ").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(Path.SEPARATOR) .append(ApplicationConstants.STDOUT); sb.append(" 2> ").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(Path.SEPARATOR) .append(ApplicationConstants.STDERR); return sb; }
From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java
License:Creative Commons License
/** * Rewrites the zip file by opening the original and appending the pre-compressed content on the fly. *//*w w w.ja v a2 s . c o m*/ private void appendPreCompressedFiles(File zipFile) throws IOException { LOG.info("Appending pre-compressed occurrence content to the Zip: " + zipFile.getAbsolutePath()); File tempZip = new File(archiveDir, zipFile.getName() + ".part"); boolean renameOk = zipFile.renameTo(tempZip); if (renameOk) { try (ZipInputStream zin = new ZipInputStream(new FileInputStream(tempZip)); ModalZipOutputStream out = new ModalZipOutputStream( new BufferedOutputStream(new FileOutputStream(zipFile)));) { // copy existing entries ZipEntry entry = zin.getNextEntry(); while (entry != null) { out.putNextEntry(new org.gbif.hadoop.compress.d2.zip.ZipEntry(entry.getName()), ModalZipOutputStream.MODE.DEFAULT); ByteStreams.copy(zin, out); entry = zin.getNextEntry(); } // NOTE: hive lowercases all the paths appendPreCompressedFile(out, new Path((hdfsPath + Path.SEPARATOR + interpretedDataTable).toLowerCase()), INTERPRETED_FILENAME, HeadersFileUtil.getIntepretedTableHeader()); appendPreCompressedFile(out, new Path((hdfsPath + Path.SEPARATOR + verbatimDataTable).toLowerCase()), VERBATIM_FILENAME, HeadersFileUtil.getVerbatimTableHeader()); appendPreCompressedFile(out, new Path((hdfsPath + Path.SEPARATOR + multimediaDataTable).toLowerCase()), MULTIMEDIA_FILENAME, HeadersFileUtil.getMultimediaTableHeader()); } finally { // we've rewritten so remove the original if (tempZip != null) { tempZip.delete(); } } } else { throw new IllegalStateException("Unable to rename existing zip, to allow appending occurrence data"); } }
From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java
License:Creative Commons License
/** * Adds an eml file per dataset involved into a subfolder "dataset" which is supported by our dwc archive reader. * Create a rights.txt and citation.txt file targeted at humans to quickly yield an overview about rights and * datasets involved.//from w ww. j ava 2 s . c o m */ private void addConstituentMetadata() throws IOException { Path citationSrc = new Path(hdfsPath + Path.SEPARATOR + citationTable); LOG.info("Adding constituent dataset metadata to archive, based on: " + citationSrc); // now read the dataset citation table and create an EML file per datasetId // first copy from HDFS to local file if (!hdfs.exists(citationSrc)) { LOG.warn("No citation file directory existing on HDFS, skip creating of dataset metadata {}", citationSrc); return; } final Map<UUID, Integer> srcDatasets = readDatasetCounts(citationSrc); File emlDir = new File(archiveDir, "dataset"); if (!srcDatasets.isEmpty()) { emlDir.mkdir(); } Closer closer = Closer.create(); Writer rightsWriter = closer.register(FileUtils.startNewUtf8File(new File(archiveDir, RIGHTS_FILENAME))); Writer citationWriter = closer .register(FileUtils.startNewUtf8File(new File(archiveDir, CITATIONS_FILENAME))); closer.register(citationWriter); // write fixed citations header citationWriter.write(CITATION_HEADER); // now iterate over constituent UUIDs for (Entry<UUID, Integer> dsEntry : srcDatasets.entrySet()) { final UUID constituentId = dsEntry.getKey(); LOG.info("Processing constituent dataset: {}", constituentId); // catch errors for each uuid to make sure one broken dataset does not bring down the entire process try { Dataset srcDataset = datasetService.get(constituentId); // citation String citationLink = writeCitation(citationWriter, srcDataset, constituentId); // rights writeRights(rightsWriter, srcDataset, citationLink); // eml file createEmlFile(constituentId, emlDir); // add as constituent for later constituents.add(new Constituent(srcDataset.getTitle(), dsEntry.getValue())); // add original author as content provider to main dataset description Contact provider = getContentProviderContact(srcDataset); if (provider != null) { dataset.getContacts().add(provider); } } catch (UniformInterfaceException e) { LOG.error(String.format("Registry client http exception: %d \n %s", e.getResponse().getStatus(), e.getResponse().getEntity(String.class)), e); } catch (Exception e) { LOG.error("Error creating download file", e); } } closer.close(); }
From source file:org.gbif.occurrence.download.oozie.ArchiveBuilder.java
License:Creative Commons License
/** * Copies and merges the hive query results files into a single, local occurrence data file. *//*from w w w .j a v a 2 s . c o m*/ private void addOccurrenceDataFile(String dataTable, String headerFileName, String destFileName) throws IOException { LOG.info("Copy-merge occurrence data hdfs file {} to local filesystem", dataTable); final Path dataSrc = new Path(hdfsPath + Path.SEPARATOR + dataTable); boolean hasRecords = hdfs.exists(dataSrc); if (!hasRecords) { hdfs.create(dataSrc); } if (!isSmallDownload && hasRecords) { // small downloads already include the headers FileUtil.copy(new File(headerFileName), hdfs, new Path(dataSrc + Path.SEPARATOR + HEADERS_FILENAME), false, conf); } File rawDataResult = new File(archiveDir, destFileName); Path dataDest = new Path(rawDataResult.toURI()); FileUtil.copyMerge(hdfs, dataSrc, localfs, dataDest, false, conf, null); // remove the CRC file created by copyMerge method removeDataCRCFile(destFileName); }
From source file:org.hdl.tensorflow.yarn.util.Utils.java
License:Apache License
public static Path copyLocalFileToDfs(FileSystem fs, String appId, Path srcPath, String dstFileName) throws IOException { Path dstPath = new Path(fs.getHomeDirectory(), Constants.DEFAULT_APP_NAME + Path.SEPARATOR + appId + Path.SEPARATOR + dstFileName); LOG.info("Copying " + srcPath + " to " + dstPath); fs.copyFromLocalFile(srcPath, dstPath); return dstPath; }
From source file:org.hortonworks.dovetail.am.AppMaster.java
License:Apache License
/** * Parse command line options/* w w w . j a va 2s. c o m*/ * * @param args * Command line args * @return Whether init successful and run should be invoked * @throws IOException */ public boolean init(String[] args) throws IOException { Map<String, String> envs = System.getenv(); ContainerId containerId = ConverterUtils.toContainerId(envs.get(Environment.CONTAINER_ID.name())); containerId = ConverterUtils.toContainerId(envs.get(Environment.CONTAINER_ID.name())); appAttemptID = containerId.getApplicationAttemptId(); if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) { throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HOST.name())) { throw new RuntimeException(Environment.NM_HOST.name() + " not set in the environment"); } if (!envs.containsKey(Environment.NM_HTTP_PORT.name())) { throw new RuntimeException(Environment.NM_HTTP_PORT + " not set in the environment"); } if (!envs.containsKey(Environment.NM_PORT.name())) { throw new RuntimeException(Environment.NM_PORT.name() + " not set in the environment"); } LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId=" + appAttemptID.getAttemptId()); numContainers = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_COUNT, DovetailConfiguration.DEFAULT_CONTAINER_COUNT); containerMemory = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_MEMORY, DovetailConfiguration.DEFAULT_CONTAINER_MEMORY); containerMemory = conf.getInt(DovetailConfiguration.DOVETAIL_CONTAINER_PRIORITY, DovetailConfiguration.DEFAULT_DOVETAIL_CONTAINER_PRIORITY); amJar = conf.get(DovetailConfiguration.DOVETAIL_AM_HDFS_DIR) + Path.SEPARATOR + System.getProperty(DovetailConfiguration.DOVETAIL_AM_JAR); return true; }
From source file:org.jahia.modules.crawl.CrawlJob.java
License:Open Source License
protected void executeInternal(JobExecutionContext context) throws JobExecutionException { try {// ww w. jav a 2 s . c om JobDataMap mergedJobDataMap = context.getMergedJobDataMap(); if (conf == null) { String baseDirPath = (String) mergedJobDataMap.get("baseDir"); if (StringUtils.isEmpty(baseDirPath)) { baseDirPath = System.getProperty("user.dir"); } String folderName = (String) mergedJobDataMap.get("folderName"); if (folderName == null) { folderName = "jahia-crawler"; } baseDir = new Path( baseDirPath + (StringUtils.isEmpty(folderName) ? "" : System.getProperty("file.separator")) + folderName); init(); } List<String> urls = (List<String>) mergedJobDataMap.get("urls"); JobConf job = new NutchJob(conf); Path tmpDir = job.getLocalPath("crawl" + Path.SEPARATOR + getDate()); CrawlDBUtil.generateSeedList(fs, urlPath, urls); // inject Injector injector = new Injector(conf); injector.inject(crawldbPath, urlPath); // generate Generator g = new Generator(conf); // fetch conf.setBoolean("fetcher.parse", true); Fetcher fetcher = new Fetcher(conf); ParseSegment parseSegment = new ParseSegment(conf); CrawlDb crawlDbTool = new CrawlDb(conf); int depth = 5; int threads = 4; int i; for (i = 0; i < depth; i++) { // generate new segment Path generatedSegment = g.generate(crawldbPath, segmentsPath, 1, Long.MAX_VALUE, Long.MAX_VALUE, false, false); if (generatedSegment == null) { logger.info("Stopping at depth=" + i + " - no more URLs to fetch."); break; } fetcher.fetch(generatedSegment, threads, true); if (!Fetcher.isParsing(job)) { parseSegment.parse(generatedSegment); // parse it, if needed } crawlDbTool.update(crawldbPath, new Path[] { generatedSegment }, true, true); } if (i > 0) { LinkDb linkDbTool = new LinkDb(conf); Indexer indexer = new Indexer(conf); DeleteDuplicates dedup = new DeleteDuplicates(conf); IndexMerger merger = new IndexMerger(conf); linkDbTool.invert(linkDb, segments, true, true, false); // invert links if (indexes != null) { // Delete old indexes if (fs.exists(indexes)) { logger.info("Deleting old indexes: " + indexes); fs.delete(indexes, true); } // Delete old index if (fs.exists(index)) { logger.info("Deleting old merged index: " + index); fs.delete(index, true); } } // index, dedup & merge FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs)); indexer.index(indexes, crawldbPath, linkDb, Arrays.asList(HadoopFSUtil.getPaths(fstats))); if (indexes != null) { dedup.dedup(new Path[] { indexes }); fstats = fs.listStatus(indexes, HadoopFSUtil.getPassDirectoriesFilter(fs)); merger.merge(HadoopFSUtil.getPaths(fstats), index, tmpDir); } } else { logger.warn("No URLs to fetch - check your seed list and URL filters."); } } catch (IOException e) { logger.error("Exception while crawling", e); } }
From source file:org.kaaproject.kaa.server.flume.sink.hdfs.HdfsSinkKey.java
License:Apache License
public String getPath() { return rootPath + Path.SEPARATOR + kaaSinkKey.getPath(); }
From source file:org.kaaproject.kaa.server.flume.sink.hdfs.KaaSinkKey.java
License:Apache License
public String getPath() { return applicationToken + Path.SEPARATOR + schemaVersion; }
From source file:org.kaaproject.kaa.server.flume.TestKaaHdfsSink.java
License:Apache License
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException { Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*"); FileStatus[] statuses = fileSystem.globStatus(logsPath); List<TestLogData> resultTestLogs = new ArrayList<>(); Schema wrapperSchema = RecordWrapperSchemaGenerator .generateRecordWrapperSchema(TestLogData.getClassSchema().toString()); for (FileStatus status : statuses) { FileReader<GenericRecord> fileReader = null; try {//from w w w . jav a 2s. com SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf()); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema); fileReader = DataFileReader.openReader(input, datumReader); for (GenericRecord record : fileReader) { RecordHeader recordHeader = (RecordHeader) record .get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD); Assert.assertEquals(header, recordHeader); TestLogData recordData = (TestLogData) record .get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD); resultTestLogs.add(recordData); } } finally { IOUtils.closeQuietly(fileReader); } } Assert.assertEquals(testLogs, resultTestLogs); }