List of usage examples for org.apache.hadoop.fs FileSystem createNewFile
public boolean createNewFile(Path f) throws IOException
From source file:CountJob.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String msgs = ""; doJob("1", args, msgs); doJob("2", args, msgs); FileSystem hdfs = FileSystem.get(conf); BufferedReader bfr = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000")))); BufferedReader bfr2 = new BufferedReader( new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000")))); Boolean same = true;/*from ww w . ja v a 2 s. co m*/ String line1; String line2; line1 = bfr.readLine(); line2 = bfr2.readLine(); while (same == true) { if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) { same = false; break; } else if ((line1 == null && line2 == null)) { break; } else { if (line1.equals(line2)) { line1 = bfr.readLine(); line2 = bfr2.readLine(); } else { same = false; break; } } } if (same == true) { System.out.print("same " + same + "\n"); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } else { System.out.print("Different"); doJob("3", args, msgs); Path localP = new Path("/tmp/output.txt"); hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP); hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000")); hdfs.createNewFile(new Path(args[1] + "/_SUCCESS")); System.out.print("created result"); } hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true); hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true); }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static boolean addTaskRunning(Configuration conf, String val) throws IOException, URISyntaxException { val = val.substring(0, val.lastIndexOf("_")); // rewrite file if second attempt String filepath = conf.get(outdir) + tasksDone + val; FileSystem fs = FileSystem.get(new URI(filepath), conf); return fs.createNewFile(new Path(filepath)); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);/*from www . ja v a2 s . c o m*/ continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }
From source file:com.blackberry.logdriver.LockedFs.java
License:Apache License
public void touch(Configuration conf, String file) throws IOException { FileSystem fs = FileSystem.get(conf); long now = System.currentTimeMillis(); Path path = new Path(file); fs.createNewFile(path); fs.setTimes(path, now, now);/*from w w w . j a v a 2s . c o m*/ }
From source file:com.blackberry.logtools.logmultisearch.java
License:Apache License
@SuppressWarnings("static-access") public int run(String[] argv) throws Exception { //Configuring configuration and filesystem to work on HDFS final Configuration conf = getConf(); //Configuration processed by ToolRunner FileSystem fs = FileSystem.get(conf); //Initiate tools used for running search LogTools tools = new LogTools(); //Other options String date_format = "RFC5424"; String field_separator = ""; ArrayList<String> D_options = new ArrayList<String>(); boolean quiet = true; boolean silent = false; boolean log = false; boolean forcelocal = false; boolean forceremote = false; //The arguments are // - strings/*from w w w. ja va 2 s . c o m*/ // - dc number // - service // - component // - startTime (Something 'date' can parse, or just a time in ms from epoch) // - endTime (Same as start) // - outputDir //Indexing for arguments to be passed for Mapreduce int stringsNum = 0; int dcNum = 1; int svcNum = 2; int compNum = 3; int startNum = 4; int endNum = 5; int outNum = 6; //Parsing through user arguments String[] args = new String[7]; int count = 0; //Count created to track the parse of all arguments int argcount = 0; //Count created to track number of arguments to be passed on while (count < argv.length) { String arg = argv[count]; count++; if (arg.equals("--")) { break; } else if (arg.startsWith("-")) { if (arg.equals("--v")) { quiet = tools.parseV(silent); } else if (arg.equals("--i")) { conf.set("logdriver.search.case.insensitive", "true"); } else if (arg.equals("--a")) { LogTools.logConsole(quiet, silent, warn, "AND searching selected"); conf.set("logdriver.search.and", "true"); } else if (arg.startsWith("--dateFormat=")) { arg = arg.replace("--dateFormat=", ""); date_format = arg; } else if (arg.startsWith("--fieldSeparator=")) { arg = arg.replace("--fieldSeparator=", ""); field_separator = arg; } else if (arg.startsWith("-strings=")) { arg = arg.replace("-strings=", ""); args[stringsNum] = arg; argcount++; } else if (arg.startsWith("-dc=")) { arg = arg.replace("-dc=", ""); args[dcNum] = arg; argcount++; } else if (arg.startsWith("-svc=")) { arg = arg.replace("-svc=", ""); args[svcNum] = arg; argcount++; } else if (arg.startsWith("-comp=")) { arg = arg.replace("-comp=", ""); args[compNum] = arg; argcount++; } else if (arg.startsWith("-start=")) { arg = arg.replace("-start=", ""); args[startNum] = arg; argcount++; } else if (arg.startsWith("-end=")) { arg = arg.replace("-end=", ""); args[endNum] = arg; argcount++; } //User inputs output directory that is to be created //Check to see if parent directory exists && output directory does not exist else if (arg.startsWith("--out=")) { args[outNum] = tools.parseOut(arg, fs); argcount++; } else if (arg.startsWith("-D")) { D_options.add(arg); } else if (arg.equals("--silent")) { silent = tools.parseSilent(quiet); } else if (arg.equals("--log")) { log = true; } else if (arg.equals("--l")) { forcelocal = tools.parsePigMode(forceremote); } else if (arg.equals("--r")) { forceremote = tools.parsePigMode(forcelocal); } else { LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg); System.exit(1); } } else { LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg); System.exit(1); } } //Default output should be stdout represented by "-" if (args[outNum] == null) { args[outNum] = "-"; argcount++; LogTools.logConsole(quiet, silent, info, "Output set to default stdout."); } if (argcount < 7) { System.err.println(";****************************************" + "\n\t\t\t NOT ENOUGH ARGUMENTS\n" + "\n\tUSAGE: logmultisearch [REQUIRED ARGUMENTS] [OPTIONS] (Order does not matter)" + "\n\tREQUIRED ARGUMENTS:" + "\n\t\t-strings=[STRINGS_DIR|STRINGS_FILE|STRING] String/file/directory of strings to search." + "\n\t\t-dc=[DATACENTER] Data Center." + "\n\t\t-svc=[SERVICE] Service." + "\n\t\t-comp=[COMPONENT] Component." + "\n\t\t-start=[START] Start time." + "\n\t\t-end=[END] End time." + "\n\tOptions:" + "\n\t\t--out=[DIRECTORY] Desired output directory. If not defined, output to stdout." + "\n\t\t--v Verbose output." + "\n\t\t--r Force remote sort." + "\n\t\t--l Force local sort." + "\n\t\t--dateFormat=[FORMAT] Valid formats are RFC822, RFC3164 (zero padded day)," + "\n\t RFC5424 (default), or any valid format string for FastDateFormat." + "\n\t\t--fieldSeparator=X The separator to use to separate fields in intermediate" + "\n\t files. Defaults to 'INFORMATION SEPARATOR ONE' (U+001F)." + "\n\t\t--silent Output only the data." + "\n\t\t--i Make search case insensitive." + "\n\t\t--a Enable AND searching." + "\n\t\t--log Save all the logs.\n" + ";****************************************"); System.exit(1); } //Parse time inputs for start and end of search args[startNum] = tools.parseDate(args[startNum]); args[endNum] = tools.parseDate(args[endNum]); tools.checkTime(args[startNum], args[endNum]); //Retrieve 'strings' argument to be able to pass search strings to HDFS //Retrieve 'out' argument to determine where output of results should be sent String strings = args[stringsNum]; String out = args[outNum]; //Generate files to temporarily store output of mapreduce jobs and pig logs locally File local_output = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10)); if (log != true) { local_output.deleteOnExit(); } File pig_tmp = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10)); if (log != true) { pig_tmp.deleteOnExit(); } //Name the temp directory for storing results in HDFS String tmp = "tmp/logmultisearch-" + RandomStringUtils.randomAlphanumeric(10); //Set args[stringsNum] to be location of search strings to be used for the Multisearch args[stringsNum] = (StringEscapeUtils.escapeJava(tmp) + "/strings"); //Set args[outNum] to be temp output directory to be passed onto MultiSearchByTime instead of UserInput argument args[outNum] = (StringEscapeUtils.escapeJava(tmp) + "/rawlines"); //Managing console output - deal with --v/--silent Logger LOG = LoggerFactory.getLogger(logmultisearch.class); tools.setConsoleOutput(local_output, quiet, silent); //Create temp directory in HDFS to store logsearch logs before sorting tools.tmpDirHDFS(quiet, silent, fs, conf, tmp, log); //If the strings argument is the path of a file, copy the file to HDFS. //If the strings argument is the path of a directory, copy all files in the directory to HDFS. //If the strings argument is not a path to a file/directory, write to a newly created file in HDFS. try { File f = new File(strings); if (f.isFile()) { LogTools.logConsole(quiet, silent, warn, "Strings input is a File..."); //dos2unix file conversion File dos2unix = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10)); dos2unix.deleteOnExit(); tools.dosTounix(f, dos2unix); //Copy over temp directory into a new directory in HDFS to be used for logmultisearch fs.copyFromLocalFile(new Path(dos2unix.getAbsolutePath()), new Path(tmp + "/strings")); } else if (f.isDirectory()) { LogTools.logConsole(quiet, silent, warn, "Strings input is a Directory..."); //Get list of all files in directory to convert from dos2unix String[] fileList = f.list(); //Create temp directory to store all converted files File tempDir = Files.createTempDir(); tempDir.deleteOnExit(); //Convert all files from dos2unix and write to temp directory for (int i = 0; i < fileList.length; i++) { File dos2unix = File.createTempFile("unix", fileList[i], tempDir); dos2unix.deleteOnExit(); tools.dosTounix(new File(f.getAbsolutePath() + "/" + fileList[i]), dos2unix); } //Copy over temp directory into a new directory in HDFS to be used for logmultisearch fs.copyFromLocalFile(new Path(tempDir.getAbsolutePath()), new Path(tmp + "/strings")); } else { LogTools.logConsole(quiet, silent, warn, "Strings input is a search string..."); //Make directory and file for strings fs.mkdirs(new Path(tmp + "/strings")); fs.createNewFile(new Path(tmp + "/strings/strings")); //Write search strings to file FSDataOutputStream hdfsOut = fs.create(new Path(tmp + "/strings/strings")); hdfsOut.writeUTF(strings); hdfsOut.close(); } } catch (Exception e) { e.printStackTrace(); System.exit(1); } LogTools.logConsole(quiet, silent, warn, "Searching..."); LogTools.logConsole(quiet, silent, warn, "Passing Arguments: SearchStrings=" + strings + " DC=" + args[dcNum] + " Service=" + args[svcNum] + " Component=" + args[compNum] + " StartTime=" + args[startNum] + " EndTime=" + args[endNum] + " Output=" + out); //Set standard configuration for running Mapreduce and PIG String queue_name = "logsearch"; //Start Mapreduce job tools.runMRJob(quiet, silent, conf, D_options, out, LOG, field_separator, queue_name, args, "MultiSearchByTime", new MultiSearchByTime()); //Before sorting, determine the number of records and size of the results found long foundresults = tools.getResults(local_output); long size = tools.getSize(foundresults, tmp, fs); //Run PIG job if results found tools.runPig(silent, quiet, foundresults, size, tmp, out, D_options, queue_name, date_format, field_separator, pig_tmp, fs, conf, forcelocal, forceremote); //Display location of tmp files if log enabled tools.logs(log, local_output, pig_tmp, tmp); return 0; }
From source file:com.datatorrent.contrib.parser.CSVParserTest.java
License:Apache License
public void createFieldMappingFile() { FileSystem hdfs = null; //Creating a file in HDFS Path newFilePath = new Path(testMeta.getDir() + "/" + filename); try {//from w w w. ja va2 s .co m hdfs = FileSystem.get(new Configuration()); hdfs.createNewFile(newFilePath); } catch (IOException ex) { DTThrowable.rethrow(ex); } //Writing data to a HDFS file StringBuilder sb = new StringBuilder(); sb.append("Eid"); sb.append(":"); sb.append("INTEGER"); sb.append("\n"); sb.append("Name"); sb.append(":"); sb.append("STRING"); sb.append("\n"); sb.append("Salary"); sb.append(":"); sb.append("LONG"); sb.append("\n"); byte[] byt = sb.toString().getBytes(); try { FSDataOutputStream fsOutStream = hdfs.create(newFilePath); fsOutStream.write(byt); fsOutStream.close(); } catch (IOException ex) { DTThrowable.rethrow(ex); } logger.debug("Written data to HDFS file."); }
From source file:com.digitalpebble.behemoth.util.ContentExtractor.java
License:Apache License
private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException { Path input = new Path(inputf); Path dirPath = new Path(outputf); FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf()); if (fsout.exists(dirPath) == false) fsout.mkdirs(dirPath);/*from w ww. j a va 2s . c o m*/ else { System.err.println("Output " + outputf + " already exists"); return -1; } // index file Path indexPath = new Path(dirPath, "index"); if (fsout.exists(indexPath) == false) { fsout.createNewFile(indexPath); } maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000); index = fsout.create(indexPath); createArchive(dirPath); FileSystem fs = input.getFileSystem(getConf()); FileStatus[] statuses = fs.listStatus(input); int count[] = { 0 }; for (int i = 0; i < statuses.length; i++) { FileStatus status = statuses[i]; Path suPath = status.getPath(); if (suPath.getName().equals("_SUCCESS")) continue; generateDocs(suPath, dirPath, count); } if (index != null) index.close(); if (currentArchive != null) { currentArchive.finish(); currentArchive.close(); } return 0; }
From source file:com.facebook.presto.hive.AbstractTestHiveFileSystem.java
License:Apache License
@Test public void testRename() throws Exception { Path basePath = new Path(getBasePath(), UUID.randomUUID().toString()); FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath); assertFalse(fs.exists(basePath));/* ww w . java 2 s. c o m*/ // create file foo.txt Path path = new Path(basePath, "foo.txt"); assertTrue(fs.createNewFile(path)); assertTrue(fs.exists(path)); // rename foo.txt to bar.txt when bar does not exist Path newPath = new Path(basePath, "bar.txt"); assertFalse(fs.exists(newPath)); assertTrue(fs.rename(path, newPath)); assertFalse(fs.exists(path)); assertTrue(fs.exists(newPath)); // rename foo.txt to foo.txt when foo.txt does not exist assertFalse(fs.rename(path, path)); // create file foo.txt and rename to existing bar.txt assertTrue(fs.createNewFile(path)); assertFalse(fs.rename(path, newPath)); // rename foo.txt to foo.txt when foo.txt exists assertFalse(fs.rename(path, path)); // delete foo.txt assertTrue(fs.delete(path, false)); assertFalse(fs.exists(path)); // create directory source with file Path source = new Path(basePath, "source"); assertTrue(fs.createNewFile(new Path(source, "test.txt"))); // rename source to non-existing target Path target = new Path(basePath, "target"); assertFalse(fs.exists(target)); assertTrue(fs.rename(source, target)); assertFalse(fs.exists(source)); assertTrue(fs.exists(target)); // create directory source with file assertTrue(fs.createNewFile(new Path(source, "test.txt"))); // rename source to existing target assertTrue(fs.rename(source, target)); assertFalse(fs.exists(source)); target = new Path(target, "source"); assertTrue(fs.exists(target)); assertTrue(fs.exists(new Path(target, "test.txt"))); // delete target target = new Path(basePath, "target"); assertTrue(fs.exists(target)); assertTrue(fs.delete(target, true)); assertFalse(fs.exists(target)); // cleanup fs.delete(basePath, true); }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }/* w w w.j a v a 2 s . c o m*/ // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java
License:Open Source License
/** * /*from w ww.j av a 2 s . c om*/ * @param srcFileName * @param destFileName * @param csvprop * @param rlen * @param clen * @throws IOException */ @SuppressWarnings("unchecked") public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem hdfs = FileSystem.get(conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /* * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already //boolean ret1 = hdfs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. //boolean ret2 = hdfs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ //boolean ret3 = hdfs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv //boolean ret4 = hdfs.rename(srcFilePath, destFilePath); //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (hdfs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<Path>(); for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = hdfs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = hdfs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart hdfs.delete(firstpart, true); hdfs.rename(tmp, firstpart); // rename srcfile to destFile hdfs.delete(destFilePath, true); hdfs.createNewFile(destFilePath); // force the creation of directory structure hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure hdfs.rename(srcFilePath, destFilePath); // move the data } else if (hdfs.isFile(srcFilePath)) { // create destination file OutputStream out = hdfs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = hdfs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }