Example usage for org.apache.hadoop.fs FileSystem makeQualified

List of usage examples for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path) 

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

/**
 * Writes out the matrix in row major (packed) order. No labels are outputed.
 *
 * @param jobConf/* w w  w .  j a v a 2 s.  c  o  m*/
 * @param input
 * @param output
 * @param digits
 * @throws IOException
 */
public static void printRowMajorMatrix(JobConf jobConf, String input, String output, int digits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(digits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    final Writer writer = new OutputStreamWriter(fos);
    final Text key = new Text();
    final DenseVectorWritable value = new DenseVectorWritable();
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final DenseVector vector = value.get();
            final StringBuilder sb = new StringBuilder();
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

/**
 * Outputs the distance matrix (DenseVectors) in Phylip Square format. Names/labels are limited to 10-characters!
 *
 * @param jobConf//  ww  w .j av  a2s. c o m
 * @param input             input directory name containing DenseVectors (as generated by this class).
 * @param output            output file name
 * @param fractionDigits    number of digits after decimal point
 * @throws IOException
 */
public static void printPhylipSquare(JobConf jobConf, String input, String output, int fractionDigits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(fractionDigits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    Writer writer = new OutputStreamWriter(fos);
    Text key = new Text();
    DenseVectorWritable value = new DenseVectorWritable();
    Boolean wroteHeader = false;
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final DenseVector vector = value.get();
            if (!wroteHeader) {
                writer.write(String.format("\t%d\n", vector.getCardinality()));
                wroteHeader = true;
            }

            final StringBuilder sb = new StringBuilder();
            final String name = key.toString();
            sb.append(name.substring(0, (name.length() > 10 ? 10 : name.length())));
            final int padding = Math.max(1, 10 - name.length());
            for (int k = 0; k < padding; k++) {
                sb.append(' ');
            }
            sb.append(' ');
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            sb.append("\n");
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

public int initJob(JobConf jobConf, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);

    /**//from   w w  w .j  av a2  s .  c om
     * Need to get all of the sample names/labels
     */
    JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);
    cacheConf.setJobName("CacheNorm2MapReduce");
    cacheConf.setNumReduceTasks(1); // Want ONE part file

    // Set up IdentityMapper
    SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input));
    cacheConf.setInputFormat(SequenceFileInputFormat.class);
    cacheConf.setMapperClass(Norm2Mapper.class);
    cacheConf.setOutputKeyClass(StringDoublePairWritable.class);
    cacheConf.setOutputValueClass(SparseVectorWritable.class);

    // Set up IdentityReducer
    cacheConf.setReducerClass(IdentityReducer.class);
    cacheConf.setOutputFormat(SequenceFileOutputFormat.class);
    cacheConf.setNumReduceTasks(1);
    Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent());
    LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString()));
    SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath);
    JobClient.runJob(cacheConf);

    Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000");

    // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info?
    StringDoublePairWritable key = new StringDoublePairWritable();
    int size = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf);
    boolean hasNext = reader.next(key);
    while (hasNext) {
        size += 1;
        hasNext = reader.next(key);
    }
    try {
        reader.close();
    } catch (IOException ioe) {
        // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
        LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
    }

    //LOG.info(String.format("Caching model file %s", qInputPath.toString()));
    URI listURI = new URI(fs.makeQualified(cachePath).toString());
    DistributedCache.addCacheFile(listURI, conf);
    LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(),
            cachePath.getName(), size));
    conf.set(CACHE_PATH, cachePath.getName());
    conf.setInt(DISTANCE_MATRIX_SIZE, size);

    /**
     * Main MapReduce Task of generating dot products
     */
    LOG.info("Generating distances");
    JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class);
    distanceConf.setJobName("DistanceMapReduce");
    // Set up distance mapper
    SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input));
    distanceConf.setInputFormat(SequenceFileInputFormat.class);
    distanceConf.setMapperClass(DistanceMap.class);
    distanceConf.setMapOutputKeyClass(Text.class);
    distanceConf.setMapOutputValueClass(SparseVectorWritable.class);

    // Set up reducer to merge lower-triangle results into a single dense distance vector
    distanceConf.setReducerClass(DistanceReducer.class);
    distanceConf.setOutputKeyClass(Text.class);
    distanceConf.setOutputValueClass(DenseVectorWritable.class);
    distanceConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output));
    JobClient.runJob(distanceConf);

    return 0;
}

From source file:org.mitre.ccv.mapred.CompleteCompositionVectorUtils.java

License:Open Source License

/**
 * Writes out the {@link SequenceFile} feature vectors in row major (packed) order. No labels are outputed.
 *
 * @param jobConf//from   w w w.  j  av a 2  s.  c  om
 * @param input     top level SequenceFile directory path
 * @param output    path to output the matrix
 * @param digits    the maximum number of fraction digits
 * @throws IOException
 */
public static void featureVectors2RowMajorMatrix(JobConf jobConf, String input, String output, int digits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(digits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    final Writer writer = new OutputStreamWriter(fos);
    final Text key = new Text();
    final SparseVectorWritable value = new SparseVectorWritable();
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final SparseVector vector = value.get();
            final StringBuilder sb = new StringBuilder();
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.GenerateFeatureVectors.java

License:Open Source License

/**
 * Start a new job with the given configuration and parameters.
 *
 * @param jobConf//from  w  w  w  .  j  a  v a  2s.  co  m
 * @param listInput         file path containing list of k-mers to use
 * @param cardinality       number of k-mers to use (if list contains less,then that will be used instead).
 * @param input             composition vector {@link SequenceFile} such as generated by {@link CalculateCompositionVectors}
 * @param output
 * @param cleanLogs
 * @return zero if no errors
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String listInput, Integer cardinality, String input, String output,
        boolean cleanLogs) throws Exception {
    JobConf conf = new JobConf(jobConf, GenerateFeatureVectors.class);
    conf.setJobName("GenerateFeatureVectors");

    Path listPath = new Path(listInput); // i.e, listInput = win32_200902260829/kmer_120811a7fa1_tmp
    FileSystem fs = listPath.getFileSystem(conf);
    if (listInput != null) {
        // @todo: should check to see if it is there!

        // It doesn't say it, but we need the quailifed path with the host name
        // otherwise URI sticks the host on to it not so nicely
        Path qPath = fs.makeQualified(listPath);
        // listPath = hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp
        LOG.info(String.format("Caching k-mer file %s", qPath.toString()));
        // URI:hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp
        URI listURI = new URI(qPath.toString());
        DistributedCache.addCacheFile(listURI, conf);
        conf.set(KMER_LIST, listPath.getName());
        //LOG.info("k-mer URI:" + listURI.toString());
    } else {
        throw new Exception("GenerateFeatureVectors requires a list of k-mers!");
    }

    /** We need this. It is okay if the cardinality is larger than the number of k-mers. */
    if (cardinality == null) {
        LOG.info("Scanning k-mer file to determine cardinality");
        FSDataInputStream ins = fs.open(listPath);

        KmerEntropyPairWritable w = new KmerEntropyPairWritable();
        int c = 0;
        while (ins.available() > 0) {
            w.readFields(ins);
            c++;
        }
        ins.close();
        fs.close();
        LOG.info(String.format("Found %d k-mers in the file", c));
        cardinality = c;
    }
    conf.setInt(VECTOR_CARDINALITY, cardinality);

    // Set up mapper
    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(CompositionVectorMap.class);
    conf.setOutputKeyClass(Text.class); // final output key class - sample name
    conf.setOutputValueClass(SparseVectorWritable.class); // final output value class

    // Set up combiner/reducer
    conf.setReducerClass(Features2VectorReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);

    return 0;
}

From source file:org.mitre.ccv.weka.mapred.ClassifyInstances.java

License:Open Source License

public int initJob(JobConf jobConf, String modelInput, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, ClassifyInstances.class);
    conf.setJobName("ClassifyInstances");

    Path listPath = new Path(modelInput);
    FileSystem fs = listPath.getFileSystem(conf);
    if (modelInput != null) {
        Path qPath = fs.makeQualified(listPath);
        LOG.info(String.format("Caching model file %s", qPath.toString()));
        URI listURI = new URI(qPath.toString());
        DistributedCache.addCacheFile(listURI, conf);
        conf.set(MODEL_PATH, listPath.getName());
    } else {// w ww .  j  a va2 s.  co  m
        throw new Exception("ClassifyInstances requires a model!");
    }

    // Set up mapper
    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(CompositionVectorJ48Map.class);
    // Painful way to set job output key class because we can't use WritableComparable
    String sortBy = conf.get(SORT_OUTPUT_BY, null);
    if (sortBy != null && !sortBy.equals(SORTBY_SAMPLE)) {
        LOG.info("Sorting output by class name and/or confidence.");
        conf.setOutputKeyClass(StringDoublePairWritable.class);
    } else {
        LOG.info("Sorting output by sample name.");
        conf.setOutputKeyClass(Text.class);
    }
    conf.setOutputValueClass(Text.class); // job output value class

    // Uses default reducer (IdentityReducer) and save it to a plain text file
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);
    return 0;
}

From source file:org.mitre.mapred.fs.FileUtils.java

License:Open Source License

/**
 * Returns a tmp path on the remote FileSystem.
 *
 * @param fs//from w  w  w  .j  a v a2s  .  c  o m
 * @param basePath
 * @return The path
 * @throws java.io.IOException
 */
public static final Path createRemoteTempPath(FileSystem fs, Path basePath) throws IOException {

    long now = System.currentTimeMillis();
    // @TODO: add constant and look up tmp dir name
    Path tmpDirPath = new Path(basePath.toString() + Path.SEPARATOR + "tmp_" + Long.toHexString(now));
    // check to see if unqiue?
    return fs.makeQualified(tmpDirPath);
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testBasics() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {//from  w w w . j  a v a  2  s. co  m
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testBasics.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        StringBuffer buf = new StringBuffer();

        String[] base = { "word1:Hello word2:world number:1 ", "word1:foo word2:bar number:2 ",
                "word1:cat word2:dog number:3 ", "word1:rock word2:paper number:4 ",
                "word1:red word2:blue, number:5 ", "word1:,green, word2:,, number:6 ", };

        int index = 0;
        while (reader.nextKeyValue()) {
            Geometry f = reader.getCurrentValue();
            String row = "";
            for (Map.Entry attr : f.getAllAttributes().entrySet()) {
                row += attr.getKey() + ":" + attr.getValue() + " ";
            }
            Assert.assertEquals("Error in row " + index, base[index++], row);
        }

        // This hash code will tell us if anything changes then it can be manually verified.
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullProcessing() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {//w  w  w . ja  v a2s .c  o m
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testNullValues.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        //StringBuffer buf = new StringBuffer();

        // Test specific rows returned to make sure the values are as expected.
        Assert.assertTrue(reader.nextKeyValue());
        Geometry f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test1", f.getAttribute("string1"));
        Assert.assertEquals(1.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(1.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 2 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test2", f.getAttribute("string1"));
        Assert.assertEquals(2.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("2"));
        // Row 3 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test3", f.getAttribute("string1"));
        Assert.assertEquals(3.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 4 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test4", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("1"));
        Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 5 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test5", f.getAttribute("string1"));
        Assert.assertEquals(5.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 6 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test6", f.getAttribute("string1"));
        Assert.assertEquals("", f.getAttribute("int1"));
        Assert.assertEquals("", f.getAttribute("double1"));
        // Row 7 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test7", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("int1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"),
                f.getAttribute("double1"));
        Assert.assertFalse(reader.nextKeyValue());
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullIgnore() throws Exception {
    FileSystem fs = new RawLocalFileSystem();
    try {//from   ww w.  j  a  v  a  2s  .c  o m
        int lineCount = 0;

        // Write columns file which defines the columns title and type
        String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n";
        cstr += "  <Column name='name' type='Nominal'/>\n";
        cstr += "  <Column name='x' type='Numeric'/>\n";
        cstr += "  <Column name='y' type='Numeric'/>\n";
        cstr += "</AllColumns>\n";
        FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns");
        PrintStream ps = new PrintStream(fos);
        ps.print(cstr);
        ps.close();

        // Write csv test data
        fos = new FileOutputStream(output + "/nullXY.csv");
        ps = new PrintStream(fos);
        // populated rows
        for (int ii = 0; ii < 10; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        // empty rows
        ps.print("ASDF,,1.0\n");
        ps.print("ASDF,1.0,\n");
        ps.print("ASDF,,\n");
        lineCount += 3;
        // populated rows
        for (int ii = 0; ii < 5; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        ps.close();

        System.out.println(output + "nulXY.csv");

        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(output, "nullXY.csv");
        testFile = fs.makeQualified(testFile);
        InputSplit split;
        long l;
        long start;

        TextInputFormat format = new TextInputFormat();
        split = new FileSplit(testFile, 0, lineCount * 1000, null);
        RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split,
                HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        l = 0;
        start = System.currentTimeMillis();
        while (reader2.nextKeyValue()) {
            reader2.getCurrentValue().toString();
            l++;
        }
        Assert.assertEquals(lineCount, l);
        System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start);

    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}