Example usage for org.apache.hadoop.fs FileSystem createNewFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem createNewFile.

Prototype

public boolean createNewFile(Path f) throws IOException

Source Link

Document

Creates the given Path as a brand-new zero-length file.

Usage

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;/*from  ww  w . ja v  a  2 s.  co m*/
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static boolean addTaskRunning(Configuration conf, String val) throws IOException, URISyntaxException {
    val = val.substring(0, val.lastIndexOf("_")); // rewrite file if second attempt
    String filepath = conf.get(outdir) + tasksDone + val;
    FileSystem fs = FileSystem.get(new URI(filepath), conf);
    return fs.createNewFile(new Path(filepath));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);/*from  www  . ja v a2 s . c  o m*/
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}

From source file:com.blackberry.logdriver.LockedFs.java

License:Apache License

public void touch(Configuration conf, String file) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    long now = System.currentTimeMillis();

    Path path = new Path(file);
    fs.createNewFile(path);
    fs.setTimes(path, now, now);/*from   w  w  w .  j  a v  a  2s . c  o m*/
}

From source file:com.blackberry.logtools.logmultisearch.java

License:Apache License

@SuppressWarnings("static-access")
public int run(String[] argv) throws Exception {

    //Configuring configuration and filesystem to work on HDFS
    final Configuration conf = getConf(); //Configuration processed by ToolRunner
    FileSystem fs = FileSystem.get(conf);
    //Initiate tools used for running search
    LogTools tools = new LogTools();

    //Other options
    String date_format = "RFC5424";
    String field_separator = "";
    ArrayList<String> D_options = new ArrayList<String>();
    boolean quiet = true;
    boolean silent = false;
    boolean log = false;
    boolean forcelocal = false;
    boolean forceremote = false;

    //The arguments are 
    // - strings/*from  w  w  w.  ja va  2 s . c  o  m*/
    // - dc number
    // - service
    // - component
    // - startTime (Something 'date' can parse, or just a time in ms from epoch)
    // - endTime (Same as start)
    // - outputDir

    //Indexing for arguments to be passed for Mapreduce
    int stringsNum = 0;
    int dcNum = 1;
    int svcNum = 2;
    int compNum = 3;
    int startNum = 4;
    int endNum = 5;
    int outNum = 6;

    //Parsing through user arguments
    String[] args = new String[7];
    int count = 0; //Count created to track the parse of all arguments
    int argcount = 0; //Count created to track number of arguments to be passed on
    while (count < argv.length) {
        String arg = argv[count];
        count++;
        if (arg.equals("--")) {
            break;
        } else if (arg.startsWith("-")) {
            if (arg.equals("--v")) {
                quiet = tools.parseV(silent);
            } else if (arg.equals("--i")) {
                conf.set("logdriver.search.case.insensitive", "true");
            } else if (arg.equals("--a")) {
                LogTools.logConsole(quiet, silent, warn, "AND searching selected");
                conf.set("logdriver.search.and", "true");
            } else if (arg.startsWith("--dateFormat=")) {
                arg = arg.replace("--dateFormat=", "");
                date_format = arg;
            } else if (arg.startsWith("--fieldSeparator=")) {
                arg = arg.replace("--fieldSeparator=", "");
                field_separator = arg;
            } else if (arg.startsWith("-strings=")) {
                arg = arg.replace("-strings=", "");
                args[stringsNum] = arg;
                argcount++;
            } else if (arg.startsWith("-dc=")) {
                arg = arg.replace("-dc=", "");
                args[dcNum] = arg;
                argcount++;
            } else if (arg.startsWith("-svc=")) {
                arg = arg.replace("-svc=", "");
                args[svcNum] = arg;
                argcount++;
            } else if (arg.startsWith("-comp=")) {
                arg = arg.replace("-comp=", "");
                args[compNum] = arg;
                argcount++;
            } else if (arg.startsWith("-start=")) {
                arg = arg.replace("-start=", "");
                args[startNum] = arg;
                argcount++;
            } else if (arg.startsWith("-end=")) {
                arg = arg.replace("-end=", "");
                args[endNum] = arg;
                argcount++;
            }
            //User inputs output directory that is to be created
            //Check to see if parent directory exists && output directory does not exist
            else if (arg.startsWith("--out=")) {
                args[outNum] = tools.parseOut(arg, fs);
                argcount++;
            } else if (arg.startsWith("-D")) {
                D_options.add(arg);
            } else if (arg.equals("--silent")) {
                silent = tools.parseSilent(quiet);
            } else if (arg.equals("--log")) {
                log = true;
            } else if (arg.equals("--l")) {
                forcelocal = tools.parsePigMode(forceremote);
            } else if (arg.equals("--r")) {
                forceremote = tools.parsePigMode(forcelocal);
            } else {
                LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg);
                System.exit(1);
            }
        } else {
            LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg);
            System.exit(1);
        }
    }

    //Default output should be stdout represented by "-"
    if (args[outNum] == null) {
        args[outNum] = "-";
        argcount++;
        LogTools.logConsole(quiet, silent, info, "Output set to default stdout.");
    }

    if (argcount < 7) {
        System.err.println(";****************************************" + "\n\t\t\t NOT ENOUGH ARGUMENTS\n"
                + "\n\tUSAGE: logmultisearch [REQUIRED ARGUMENTS] [OPTIONS] (Order does not matter)"
                + "\n\tREQUIRED ARGUMENTS:"
                + "\n\t\t-strings=[STRINGS_DIR|STRINGS_FILE|STRING]   String/file/directory of strings to search."
                + "\n\t\t-dc=[DATACENTER]            Data Center."
                + "\n\t\t-svc=[SERVICE]               Service."
                + "\n\t\t-comp=[COMPONENT]                Component."
                + "\n\t\t-start=[START]               Start time." + "\n\t\t-end=[END]               End time."
                + "\n\tOptions:"
                + "\n\t\t--out=[DIRECTORY]                  Desired output directory. If not defined, output to stdout."
                + "\n\t\t--v                              Verbose output."
                + "\n\t\t--r                              Force remote sort."
                + "\n\t\t--l                              Force local sort."
                + "\n\t\t--dateFormat=[FORMAT]              Valid formats are RFC822, RFC3164 (zero padded day),"
                + "\n\t                                   RFC5424 (default), or any valid format string for FastDateFormat."
                + "\n\t\t--fieldSeparator=X                  The separator to use to separate fields in intermediate"
                + "\n\t                                      files.  Defaults to 'INFORMATION SEPARATOR ONE' (U+001F)."
                + "\n\t\t--silent               Output only the data."
                + "\n\t\t--i                              Make search case insensitive."
                + "\n\t\t--a                              Enable AND searching."
                + "\n\t\t--log                  Save all the logs.\n"
                + ";****************************************");
        System.exit(1);
    }

    //Parse time inputs for start and end of search
    args[startNum] = tools.parseDate(args[startNum]);
    args[endNum] = tools.parseDate(args[endNum]);
    tools.checkTime(args[startNum], args[endNum]);

    //Retrieve 'strings' argument to be able to pass search strings to HDFS
    //Retrieve 'out' argument to determine where output of results should be sent
    String strings = args[stringsNum];
    String out = args[outNum];

    //Generate files to temporarily store output of mapreduce jobs and pig logs locally                
    File local_output = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
    if (log != true) {
        local_output.deleteOnExit();
    }
    File pig_tmp = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
    if (log != true) {
        pig_tmp.deleteOnExit();
    }

    //Name the temp directory for storing results in HDFS
    String tmp = "tmp/logmultisearch-" + RandomStringUtils.randomAlphanumeric(10);

    //Set args[stringsNum] to be location of search strings to be used for the Multisearch
    args[stringsNum] = (StringEscapeUtils.escapeJava(tmp) + "/strings");

    //Set args[outNum] to be temp output directory to be passed onto MultiSearchByTime instead of UserInput argument
    args[outNum] = (StringEscapeUtils.escapeJava(tmp) + "/rawlines");

    //Managing console output - deal with --v/--silent
    Logger LOG = LoggerFactory.getLogger(logmultisearch.class);
    tools.setConsoleOutput(local_output, quiet, silent);

    //Create temp directory in HDFS to store logsearch logs before sorting
    tools.tmpDirHDFS(quiet, silent, fs, conf, tmp, log);

    //If the strings argument is the path of a file, copy the file to HDFS.
    //If the strings argument is the path of a directory, copy all files in the directory to HDFS.
    //If the strings argument is not a path to a file/directory, write to a newly created file in HDFS.
    try {
        File f = new File(strings);
        if (f.isFile()) {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a File...");

            //dos2unix file conversion
            File dos2unix = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
            dos2unix.deleteOnExit();
            tools.dosTounix(f, dos2unix);

            //Copy over temp directory into a new directory in HDFS to be used for logmultisearch
            fs.copyFromLocalFile(new Path(dos2unix.getAbsolutePath()), new Path(tmp + "/strings"));
        } else if (f.isDirectory()) {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a Directory...");

            //Get list of all files in directory to convert from dos2unix
            String[] fileList = f.list();

            //Create temp directory to store all converted files
            File tempDir = Files.createTempDir();
            tempDir.deleteOnExit();

            //Convert all files from dos2unix and write to temp directory
            for (int i = 0; i < fileList.length; i++) {
                File dos2unix = File.createTempFile("unix", fileList[i], tempDir);
                dos2unix.deleteOnExit();
                tools.dosTounix(new File(f.getAbsolutePath() + "/" + fileList[i]), dos2unix);
            }

            //Copy over temp directory into a new directory in HDFS to be used for logmultisearch
            fs.copyFromLocalFile(new Path(tempDir.getAbsolutePath()), new Path(tmp + "/strings"));
        } else {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a search string...");
            //Make directory and file for strings
            fs.mkdirs(new Path(tmp + "/strings"));
            fs.createNewFile(new Path(tmp + "/strings/strings"));
            //Write search strings to file
            FSDataOutputStream hdfsOut = fs.create(new Path(tmp + "/strings/strings"));
            hdfsOut.writeUTF(strings);
            hdfsOut.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    LogTools.logConsole(quiet, silent, warn, "Searching...");
    LogTools.logConsole(quiet, silent, warn,
            "Passing Arguments: SearchStrings=" + strings + " DC=" + args[dcNum] + " Service=" + args[svcNum]
                    + " Component=" + args[compNum] + " StartTime=" + args[startNum] + " EndTime="
                    + args[endNum] + " Output=" + out);

    //Set standard configuration for running Mapreduce and PIG
    String queue_name = "logsearch";

    //Start Mapreduce job
    tools.runMRJob(quiet, silent, conf, D_options, out, LOG, field_separator, queue_name, args,
            "MultiSearchByTime", new MultiSearchByTime());

    //Before sorting, determine the number of records and size of the results found
    long foundresults = tools.getResults(local_output);
    long size = tools.getSize(foundresults, tmp, fs);

    //Run PIG job if results found
    tools.runPig(silent, quiet, foundresults, size, tmp, out, D_options, queue_name, date_format,
            field_separator, pig_tmp, fs, conf, forcelocal, forceremote);

    //Display location of tmp files if log enabled
    tools.logs(log, local_output, pig_tmp, tmp);

    return 0;
}

From source file:com.datatorrent.contrib.parser.CSVParserTest.java

License:Apache License

public void createFieldMappingFile() {
    FileSystem hdfs = null;
    //Creating a file in HDFS
    Path newFilePath = new Path(testMeta.getDir() + "/" + filename);
    try {//from  w w  w. ja  va2 s  .co  m
        hdfs = FileSystem.get(new Configuration());
        hdfs.createNewFile(newFilePath);
    } catch (IOException ex) {
        DTThrowable.rethrow(ex);
    }
    //Writing data to a HDFS file
    StringBuilder sb = new StringBuilder();
    sb.append("Eid");
    sb.append(":");
    sb.append("INTEGER");
    sb.append("\n");
    sb.append("Name");
    sb.append(":");
    sb.append("STRING");
    sb.append("\n");
    sb.append("Salary");
    sb.append(":");
    sb.append("LONG");
    sb.append("\n");
    byte[] byt = sb.toString().getBytes();
    try {
        FSDataOutputStream fsOutStream = hdfs.create(newFilePath);
        fsOutStream.write(byt);
        fsOutStream.close();
    } catch (IOException ex) {
        DTThrowable.rethrow(ex);

    }
    logger.debug("Written data to HDFS file.");
}

From source file:com.digitalpebble.behemoth.util.ContentExtractor.java

License:Apache License

private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException {

    Path input = new Path(inputf);
    Path dirPath = new Path(outputf);

    FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf());

    if (fsout.exists(dirPath) == false)
        fsout.mkdirs(dirPath);/*from w  ww. j  a  va  2s .  c o m*/
    else {
        System.err.println("Output " + outputf + " already exists");
        return -1;
    }

    // index file
    Path indexPath = new Path(dirPath, "index");
    if (fsout.exists(indexPath) == false) {
        fsout.createNewFile(indexPath);
    }

    maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000);

    index = fsout.create(indexPath);

    createArchive(dirPath);

    FileSystem fs = input.getFileSystem(getConf());
    FileStatus[] statuses = fs.listStatus(input);
    int count[] = { 0 };
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        Path suPath = status.getPath();
        if (suPath.getName().equals("_SUCCESS"))
            continue;
        generateDocs(suPath, dirPath, count);
    }

    if (index != null)
        index.close();

    if (currentArchive != null) {
        currentArchive.finish();
        currentArchive.close();
    }

    return 0;
}

From source file:com.facebook.presto.hive.AbstractTestHiveFileSystem.java

License:Apache License

@Test
public void testRename() throws Exception {
    Path basePath = new Path(getBasePath(), UUID.randomUUID().toString());
    FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath);
    assertFalse(fs.exists(basePath));/* ww  w  .  java 2  s. c  o m*/

    // create file foo.txt
    Path path = new Path(basePath, "foo.txt");
    assertTrue(fs.createNewFile(path));
    assertTrue(fs.exists(path));

    // rename foo.txt to bar.txt when bar does not exist
    Path newPath = new Path(basePath, "bar.txt");
    assertFalse(fs.exists(newPath));
    assertTrue(fs.rename(path, newPath));
    assertFalse(fs.exists(path));
    assertTrue(fs.exists(newPath));

    // rename foo.txt to foo.txt when foo.txt does not exist
    assertFalse(fs.rename(path, path));

    // create file foo.txt and rename to existing bar.txt
    assertTrue(fs.createNewFile(path));
    assertFalse(fs.rename(path, newPath));

    // rename foo.txt to foo.txt when foo.txt exists
    assertFalse(fs.rename(path, path));

    // delete foo.txt
    assertTrue(fs.delete(path, false));
    assertFalse(fs.exists(path));

    // create directory source with file
    Path source = new Path(basePath, "source");
    assertTrue(fs.createNewFile(new Path(source, "test.txt")));

    // rename source to non-existing target
    Path target = new Path(basePath, "target");
    assertFalse(fs.exists(target));
    assertTrue(fs.rename(source, target));
    assertFalse(fs.exists(source));
    assertTrue(fs.exists(target));

    // create directory source with file
    assertTrue(fs.createNewFile(new Path(source, "test.txt")));

    // rename source to existing target
    assertTrue(fs.rename(source, target));
    assertFalse(fs.exists(source));
    target = new Path(target, "source");
    assertTrue(fs.exists(target));
    assertTrue(fs.exists(new Path(target, "test.txt")));

    // delete target
    target = new Path(basePath, "target");
    assertTrue(fs.exists(target));
    assertTrue(fs.delete(target, true));
    assertFalse(fs.exists(target));

    // cleanup
    fs.delete(basePath, true);
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }/*  w w w.j  a  v  a 2 s . c o m*/

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.ibm.bi.dml.runtime.io.WriterTextCSV.java

License:Open Source License

/**
 * /*from   w  ww.j  av  a 2  s .  c  om*/
 * @param srcFileName
 * @param destFileName
 * @param csvprop
 * @param rlen
 * @param clen
 * @throws IOException
 */
@SuppressWarnings("unchecked")
public void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem hdfs = FileSystem.get(conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        //boolean ret1 = 
        hdfs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        //boolean ret2 = 
        hdfs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        //boolean ret3 = 
        hdfs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        //boolean ret4 = 
        hdfs.rename(srcFilePath, destFilePath);

        //System.out.println("Return values = del:" + ret1 + ", createNew:" + ret2 + ", del:" + ret3 + ", rename:" + ret4);
        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (hdfs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<Path>();
        for (FileStatus stat : hdfs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = hdfs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = hdfs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        hdfs.delete(firstpart, true);
        hdfs.rename(tmp, firstpart);

        // rename srcfile to destFile
        hdfs.delete(destFilePath, true);
        hdfs.createNewFile(destFilePath); // force the creation of directory structure
        hdfs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        hdfs.rename(srcFilePath, destFilePath); // move the data 

    } else if (hdfs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = hdfs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = hdfs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}