List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);//from www .ja v a 2 s . co m continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * Get a list of all paths where output from committed tasks are stored. * @param context the context of the current job * @return the list of these Paths/FileStatuses. * @throws IOException// w w w.jav a2 s.co m */ private FileStatus[] getAllCommittedTaskPaths(JobContext context) throws IOException { Path jobAttemptPath = getJobAttemptPath(context); FileSystem fs = jobAttemptPath.getFileSystem(context.getConfiguration()); return fs.listStatus(jobAttemptPath, new CommittedTaskFilter()); }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
/** * // w w w . j a v a 2s . com * @param delimiter * @param column * * */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF); this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:co.nubetech.hiho.job.ExportToOracleDb.java
License:Apache License
public static String getAlterTableDML(Path inputPath, Configuration conf) throws IOException, HIHOException { // after running the job, we need to alter the external table to take // care of the files we have added FileStatus[] contents = inputPath.getFileSystem(conf).listStatus(inputPath); if (contents != null) { StringBuilder dml = new StringBuilder(); dml.append(" ALTER TABLE "); dml.append(getTableName(conf.get(HIHOConf.EXTERNAL_TABLE_DML))); dml.append(" LOCATION ("); int i = 0; for (FileStatus content : contents) { String fileName = content.getPath().getName(); dml.append("\'"); dml.append(fileName);/*w w w . j a v a2 s. c o m*/ dml.append("\'"); // add comma uptil one and last if (i < contents.length - 1) { dml.append(","); } i++; } // execute dml to alter location dml.append(")"); return dml.toString(); } return null; }
From source file:co.nubetech.hiho.job.TestExportToOracleDb.java
License:Apache License
@Test public void testAlterTableDMl() throws HIHOException, IOException { Configuration conf = mock(Configuration.class); Path path = mock(Path.class); FileStatus status1 = mock(FileStatus.class); Path path1 = mock(Path.class); when(path1.getName()).thenReturn("part-xxxxx"); when(status1.getPath()).thenReturn(path1); FileStatus status2 = mock(FileStatus.class); Path path2 = mock(Path.class); when(path2.getName()).thenReturn("part-yyyyy"); when(status2.getPath()).thenReturn(path2); FileSystem fs = mock(FileSystem.class); when(fs.listStatus(path)).thenReturn(new FileStatus[] { status1, status2 }); when(path.getFileSystem(conf)).thenReturn(fs); when(conf.get(HIHOConf.EXTERNAL_TABLE_DML)).thenReturn( "create table age( i Number, n Varchar(20), a Number)organization external ( type oracle_loader default directory ext_dir access parameters (records delimited by newlinefields terminated by ','missing field values are null )location (/home/nube/:file.txt) reject' limit unlimited;"); String dml = ExportToOracleDb.getAlterTableDML(path, conf); assertEquals(" ALTER TABLE age LOCATION ('part-xxxxx','part-yyyyy')", dml); }
From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java
License:Apache License
@Override public FSDataInputStream createValue() { logger.debug("Creating value"); FSDataInputStream stream = null;//w ww . j av a 2 s . co m Path file = split.getPath(); logger.debug("Path is " + file); fileName = file.getName(); try { FileSystem fs = file.getFileSystem(configuration); stream = new FSDataInputStream(fs.open(file)); } catch (IOException e) { e.printStackTrace(); } logger.debug("Opened stream"); return stream; }
From source file:co.nubetech.hiho.mapreduce.lib.input.FileStreamRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException { logger.debug("Inside nextKeyValue"); if (!isRead) { Path file = split.getPath(); logger.debug("Path is " + file); fileName = file.getName();/*from ww w . ja v a 2s .co m*/ FileSystem fs = file.getFileSystem(context.getConfiguration()); stream = fs.open(file); logger.debug("Opened stream"); isRead = true; return true; } return false; }
From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java
License:Apache License
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { Path p1;// w ww. java 2 s.co m isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND, "false"); if (isAppend.equalsIgnoreCase("false")) { p1 = super.getDefaultWorkFile(context, extension); } else { FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); Path p = committer.getWorkPath(); fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context)) .getFileCount(); if (fileCount > 1) fileCount = fileCount - 1; p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); } return p1; }
From source file:co.nubetech.hiho.mapreduce.lib.output.AppendTextOutputFormat.java
License:Apache License
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { Path p1;//ww w. j a v a 2 s . c o m isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND); if (isAppend.equalsIgnoreCase("false")) { p1 = super.getDefaultWorkFile(context, extension); } else { FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); Path p = committer.getWorkPath(); fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context)) .getFileCount(); if (fileCount > 1) { fileCount = fileCount - 1; } p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); } return p1; }
From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); String ip = conf.get(HIHOConf.FTP_ADDRESS); String portno = conf.get(HIHOConf.FTP_PORT); String usr = conf.get(HIHOConf.FTP_USER); String pwd = conf.get(HIHOConf.FTP_PASSWORD); String dir = getOutputPath(job).toString(); System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); FTPClient f = new FTPClient(); f.connect(ip, Integer.parseInt(portno)); f.login(usr, pwd);//from w w w. j a v a 2s. c om f.changeWorkingDirectory(dir); f.setFileType(FTP.BINARY_FILE_TYPE); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); String filename = file.getName(); if (!isCompressed) { // FSDataOutputStream fileOut = fs.create(file, false); OutputStream os = f.appendFileStream(filename); DataOutputStream fileOut = new DataOutputStream(os); return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f); } else { // FSDataOutputStream fileOut = fs.create(file, false); OutputStream os = f.appendFileStream(filename); DataOutputStream fileOut = new DataOutputStream(os); return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, f); } }