Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

/**
 * Writes out the matrix in row major (packed) order. No labels are outputed.
 *
 * @param jobConf/* w w  w .  j a v a 2 s.  c  o  m*/
 * @param input
 * @param output
 * @param digits
 * @throws IOException
 */
public static void printRowMajorMatrix(JobConf jobConf, String input, String output, int digits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(digits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    final Writer writer = new OutputStreamWriter(fos);
    final Text key = new Text();
    final DenseVectorWritable value = new DenseVectorWritable();
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final DenseVector vector = value.get();
            final StringBuilder sb = new StringBuilder();
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

/**
 * Outputs the distance matrix (DenseVectors) in Phylip Square format. Names/labels are limited to 10-characters!
 *
 * @param jobConf//  ww  w .j av  a2s. c o m
 * @param input             input directory name containing DenseVectors (as generated by this class).
 * @param output            output file name
 * @param fractionDigits    number of digits after decimal point
 * @throws IOException
 */
public static void printPhylipSquare(JobConf jobConf, String input, String output, int fractionDigits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(fractionDigits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    Writer writer = new OutputStreamWriter(fos);
    Text key = new Text();
    DenseVectorWritable value = new DenseVectorWritable();
    Boolean wroteHeader = false;
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final DenseVector vector = value.get();
            if (!wroteHeader) {
                writer.write(String.format("\t%d\n", vector.getCardinality()));
                wroteHeader = true;
            }

            final StringBuilder sb = new StringBuilder();
            final String name = key.toString();
            sb.append(name.substring(0, (name.length() > 10 ? 10 : name.length())));
            final int padding = Math.max(1, 10 - name.length());
            for (int k = 0; k < padding; k++) {
                sb.append(' ');
            }
            sb.append(' ');
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            sb.append("\n");
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.CalculateCosineDistanceMatrix.java

License:Open Source License

public int initJob(JobConf jobConf, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);

    /**//from   w w  w .j  av a2  s .  c om
     * Need to get all of the sample names/labels
     */
    JobConf cacheConf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);
    cacheConf.setJobName("CacheNorm2MapReduce");
    cacheConf.setNumReduceTasks(1); // Want ONE part file

    // Set up IdentityMapper
    SequenceFileInputFormat.setInputPaths(cacheConf, new Path(input));
    cacheConf.setInputFormat(SequenceFileInputFormat.class);
    cacheConf.setMapperClass(Norm2Mapper.class);
    cacheConf.setOutputKeyClass(StringDoublePairWritable.class);
    cacheConf.setOutputValueClass(SparseVectorWritable.class);

    // Set up IdentityReducer
    cacheConf.setReducerClass(IdentityReducer.class);
    cacheConf.setOutputFormat(SequenceFileOutputFormat.class);
    cacheConf.setNumReduceTasks(1);
    Path sfPath = FileUtils.createRemoteTempPath(fs, qInputPath.getParent());
    LOG.info(String.format("Generating feature vector SequenceFile path %s", sfPath.toString()));
    SequenceFileOutputFormat.setOutputPath(cacheConf, sfPath);
    JobClient.runJob(cacheConf);

    Path cachePath = new Path(sfPath.toString() + Path.SEPARATOR + "part-00000");

    // need to know the size (the reducer might be able to send this back via the Reporter, but how do we grab that info?
    StringDoublePairWritable key = new StringDoublePairWritable();
    int size = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, cachePath, conf);
    boolean hasNext = reader.next(key);
    while (hasNext) {
        size += 1;
        hasNext = reader.next(key);
    }
    try {
        reader.close();
    } catch (IOException ioe) {
        // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
        LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
    }

    //LOG.info(String.format("Caching model file %s", qInputPath.toString()));
    URI listURI = new URI(fs.makeQualified(cachePath).toString());
    DistributedCache.addCacheFile(listURI, conf);
    LOG.info(String.format("SequenceFile cache path %s (%s) with %d labels", listURI.toString(),
            cachePath.getName(), size));
    conf.set(CACHE_PATH, cachePath.getName());
    conf.setInt(DISTANCE_MATRIX_SIZE, size);

    /**
     * Main MapReduce Task of generating dot products
     */
    LOG.info("Generating distances");
    JobConf distanceConf = new JobConf(conf, CalculateCosineDistanceMatrix.class);
    distanceConf.setJobName("DistanceMapReduce");
    // Set up distance mapper
    SequenceFileInputFormat.setInputPaths(distanceConf, new Path(input));
    distanceConf.setInputFormat(SequenceFileInputFormat.class);
    distanceConf.setMapperClass(DistanceMap.class);
    distanceConf.setMapOutputKeyClass(Text.class);
    distanceConf.setMapOutputValueClass(SparseVectorWritable.class);

    // Set up reducer to merge lower-triangle results into a single dense distance vector
    distanceConf.setReducerClass(DistanceReducer.class);
    distanceConf.setOutputKeyClass(Text.class);
    distanceConf.setOutputValueClass(DenseVectorWritable.class);
    distanceConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(distanceConf, new Path(output));
    JobClient.runJob(distanceConf);

    return 0;
}

From source file:org.mitre.ccv.mapred.CompleteCompositionVectorUtils.java

License:Open Source License

/**
 * Writes out the {@link SequenceFile} feature vectors in row major (packed) order. No labels are outputed.
 *
 * @param jobConf//from   w w w.  j  av a 2  s.  c  om
 * @param input     top level SequenceFile directory path
 * @param output    path to output the matrix
 * @param digits    the maximum number of fraction digits
 * @throws IOException
 */
public static void featureVectors2RowMajorMatrix(JobConf jobConf, String input, String output, int digits)
        throws IOException {
    JobConf conf = new JobConf(jobConf, CalculateCosineDistanceMatrix.class);

    DecimalFormat format = new DecimalFormat();
    format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
    format.setMinimumIntegerDigits(1);
    format.setMaximumFractionDigits(digits);
    //format.setMinimumFractionDigits(fractionDigits);
    format.setGroupingUsed(false);

    final Path inputPath = new Path(input);
    final FileSystem fs = inputPath.getFileSystem(conf);
    final Path qInputPath = fs.makeQualified(inputPath);
    final Path outputPath = new Path(output);
    Path[] paths = FileUtils.ls(conf, qInputPath.toString() + Path.SEPARATOR + "part-*");

    FSDataOutputStream fos = fs.create(outputPath, true); // throws nothing!
    final Writer writer = new OutputStreamWriter(fos);
    final Text key = new Text();
    final SparseVectorWritable value = new SparseVectorWritable();
    for (int idx = 0; idx < paths.length; idx++) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, paths[idx], conf);
        boolean hasNext = reader.next(key, value);
        while (hasNext) {

            final SparseVector vector = value.get();
            final StringBuilder sb = new StringBuilder();
            for (int i = 0; i < vector.getCardinality(); i++) {
                final String s = format.format(vector.get(i)); // format the number
                sb.append(s);
                sb.append(' ');
            }
            writer.write(sb.toString());
            hasNext = reader.next(key, value);
        }
        try {
            writer.flush();
            reader.close();
        } catch (IOException ioe) {
            // closing the SequenceFile.Reader will throw an exception if the file is over some unknown size
            LOG.debug("Probably caused by closing the SequenceFile.Reader. All is well", ioe);
        }
    }
    try {
        writer.close();
        fos.flush();
        fos.close();
    } catch (IOException ioe) {
        LOG.debug("Caused by distributed cache output stream.", ioe);
    }
}

From source file:org.mitre.ccv.mapred.GenerateFeatureVectors.java

License:Open Source License

/**
 * Start a new job with the given configuration and parameters.
 *
 * @param jobConf//from  w  w  w  .  j  a  v a  2s.  co  m
 * @param listInput         file path containing list of k-mers to use
 * @param cardinality       number of k-mers to use (if list contains less,then that will be used instead).
 * @param input             composition vector {@link SequenceFile} such as generated by {@link CalculateCompositionVectors}
 * @param output
 * @param cleanLogs
 * @return zero if no errors
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String listInput, Integer cardinality, String input, String output,
        boolean cleanLogs) throws Exception {
    JobConf conf = new JobConf(jobConf, GenerateFeatureVectors.class);
    conf.setJobName("GenerateFeatureVectors");

    Path listPath = new Path(listInput); // i.e, listInput = win32_200902260829/kmer_120811a7fa1_tmp
    FileSystem fs = listPath.getFileSystem(conf);
    if (listInput != null) {
        // @todo: should check to see if it is there!

        // It doesn't say it, but we need the quailifed path with the host name
        // otherwise URI sticks the host on to it not so nicely
        Path qPath = fs.makeQualified(listPath);
        // listPath = hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp
        LOG.info(String.format("Caching k-mer file %s", qPath.toString()));
        // URI:hdfs://rocks5.local:54310/user/mcolosimo/win32_200902260829/kmer_120811a7fa1_tmp
        URI listURI = new URI(qPath.toString());
        DistributedCache.addCacheFile(listURI, conf);
        conf.set(KMER_LIST, listPath.getName());
        //LOG.info("k-mer URI:" + listURI.toString());
    } else {
        throw new Exception("GenerateFeatureVectors requires a list of k-mers!");
    }

    /** We need this. It is okay if the cardinality is larger than the number of k-mers. */
    if (cardinality == null) {
        LOG.info("Scanning k-mer file to determine cardinality");
        FSDataInputStream ins = fs.open(listPath);

        KmerEntropyPairWritable w = new KmerEntropyPairWritable();
        int c = 0;
        while (ins.available() > 0) {
            w.readFields(ins);
            c++;
        }
        ins.close();
        fs.close();
        LOG.info(String.format("Found %d k-mers in the file", c));
        cardinality = c;
    }
    conf.setInt(VECTOR_CARDINALITY, cardinality);

    // Set up mapper
    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(CompositionVectorMap.class);
    conf.setOutputKeyClass(Text.class); // final output key class - sample name
    conf.setOutputValueClass(SparseVectorWritable.class); // final output value class

    // Set up combiner/reducer
    conf.setReducerClass(Features2VectorReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);

    return 0;
}

From source file:org.mitre.ccv.weka.mapred.ClassifyInstances.java

License:Open Source License

public int initJob(JobConf jobConf, String modelInput, String input, String output) throws Exception {
    JobConf conf = new JobConf(jobConf, ClassifyInstances.class);
    conf.setJobName("ClassifyInstances");

    Path listPath = new Path(modelInput);
    FileSystem fs = listPath.getFileSystem(conf);
    if (modelInput != null) {
        Path qPath = fs.makeQualified(listPath);
        LOG.info(String.format("Caching model file %s", qPath.toString()));
        URI listURI = new URI(qPath.toString());
        DistributedCache.addCacheFile(listURI, conf);
        conf.set(MODEL_PATH, listPath.getName());
    } else {// w ww .  j  a va2 s.  co  m
        throw new Exception("ClassifyInstances requires a model!");
    }

    // Set up mapper
    SequenceFileInputFormat.setInputPaths(conf, new Path(input));
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(CompositionVectorJ48Map.class);
    // Painful way to set job output key class because we can't use WritableComparable
    String sortBy = conf.get(SORT_OUTPUT_BY, null);
    if (sortBy != null && !sortBy.equals(SORTBY_SAMPLE)) {
        LOG.info("Sorting output by class name and/or confidence.");
        conf.setOutputKeyClass(StringDoublePairWritable.class);
    } else {
        LOG.info("Sorting output by sample name.");
        conf.setOutputKeyClass(Text.class);
    }
    conf.setOutputValueClass(Text.class); // job output value class

    // Uses default reducer (IdentityReducer) and save it to a plain text file
    conf.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(conf, new Path(output));

    JobClient.runJob(conf);
    return 0;
}

From source file:org.mitre.mapred.fs.FileUtils.java

License:Open Source License

/**
 * Returns a tmp path on the remote FileSystem.
 *
 * @param fs//from w  w  w  .j  a v a2s  .  c  o m
 * @param basePath
 * @return The path
 * @throws java.io.IOException
 */
public static final Path createRemoteTempPath(FileSystem fs, Path basePath) throws IOException {

    long now = System.currentTimeMillis();
    // @TODO: add constant and look up tmp dir name
    Path tmpDirPath = new Path(basePath.toString() + Path.SEPARATOR + "tmp_" + Long.toHexString(now));
    // check to see if unqiue?
    return fs.makeQualified(tmpDirPath);
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testBasics() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {//from  w w w . j  a v a  2  s. co  m
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testBasics.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        StringBuffer buf = new StringBuffer();

        String[] base = { "word1:Hello word2:world number:1 ", "word1:foo word2:bar number:2 ",
                "word1:cat word2:dog number:3 ", "word1:rock word2:paper number:4 ",
                "word1:red word2:blue, number:5 ", "word1:,green, word2:,, number:6 ", };

        int index = 0;
        while (reader.nextKeyValue()) {
            Geometry f = reader.getCurrentValue();
            String row = "";
            for (Map.Entry attr : f.getAllAttributes().entrySet()) {
                row += attr.getKey() + ":" + attr.getValue() + " ";
            }
            Assert.assertEquals("Error in row " + index, base[index++], row);
        }

        // This hash code will tell us if anything changes then it can be manually verified.
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullProcessing() throws Exception {
    // this class and its unit tests are a work in progress.
    FileSystem fs = new RawLocalFileSystem();
    try {//w  w  w . ja  v a2s .c  o m
        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(input, "testNullValues.csv");
        testFile = fs.makeQualified(testFile);

        FileInputFormat.addInputPath(j, testFile);
        FileSplit split = new FileSplit(testFile, 0, 500, null);
        CsvInputFormat.CsvRecordReader reader = new CsvInputFormat.CsvRecordReader();
        reader.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        @SuppressWarnings("unused")
        int l = 0;

        //StringBuffer buf = new StringBuffer();

        // Test specific rows returned to make sure the values are as expected.
        Assert.assertTrue(reader.nextKeyValue());
        Geometry f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test1", f.getAttribute("string1"));
        Assert.assertEquals(1.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(1.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 2 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test2", f.getAttribute("string1"));
        Assert.assertEquals(2.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"), f.getAttribute("2"));
        // Row 3 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test3", f.getAttribute("string1"));
        Assert.assertEquals(3.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(3.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 4 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test4", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("1"));
        Assert.assertEquals(4.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 5 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test5", f.getAttribute("string1"));
        Assert.assertEquals(5.0, Double.parseDouble(f.getAttribute("int1")), EPSILON);
        Assert.assertEquals(5.5, Double.parseDouble(f.getAttribute("double1")), EPSILON);
        // Row 6 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test6", f.getAttribute("string1"));
        Assert.assertEquals("", f.getAttribute("int1"));
        Assert.assertEquals("", f.getAttribute("double1"));
        // Row 7 check
        Assert.assertTrue(reader.nextKeyValue());
        f = reader.getCurrentValue();
        Assert.assertNotNull(f);
        Assert.assertEquals("test7", f.getAttribute("string1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("int1"), f.getAttribute("int1"));
        Assert.assertNull("Expected null value instead of: " + f.getAttribute("double1"),
                f.getAttribute("double1"));
        Assert.assertFalse(reader.nextKeyValue());
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}

From source file:org.mrgeo.format.CsvInputFormatTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void testNullIgnore() throws Exception {
    FileSystem fs = new RawLocalFileSystem();
    try {//from   ww w.  j  a  v  a  2s  .c  o m
        int lineCount = 0;

        // Write columns file which defines the columns title and type
        String cstr = "<?xml version='1.0' encoding='UTF-8'?>\n<AllColumns firstLineHeader='false'>\n";
        cstr += "  <Column name='name' type='Nominal'/>\n";
        cstr += "  <Column name='x' type='Numeric'/>\n";
        cstr += "  <Column name='y' type='Numeric'/>\n";
        cstr += "</AllColumns>\n";
        FileOutputStream fos = new FileOutputStream(output + "/nulXY.csv.columns");
        PrintStream ps = new PrintStream(fos);
        ps.print(cstr);
        ps.close();

        // Write csv test data
        fos = new FileOutputStream(output + "/nullXY.csv");
        ps = new PrintStream(fos);
        // populated rows
        for (int ii = 0; ii < 10; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        // empty rows
        ps.print("ASDF,,1.0\n");
        ps.print("ASDF,1.0,\n");
        ps.print("ASDF,,\n");
        lineCount += 3;
        // populated rows
        for (int ii = 0; ii < 5; ii++) {
            ps.print("ASDF,1.0,1.0\n");
            lineCount++;
        }
        ps.close();

        System.out.println(output + "nulXY.csv");

        Job j = new Job(new Configuration());
        Configuration c = j.getConfiguration();
        fs.setConf(c);
        Path testFile = new Path(output, "nullXY.csv");
        testFile = fs.makeQualified(testFile);
        InputSplit split;
        long l;
        long start;

        TextInputFormat format = new TextInputFormat();
        split = new FileSplit(testFile, 0, lineCount * 1000, null);
        RecordReader<LongWritable, Text> reader2 = format.createRecordReader(split,
                HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));

        reader2.initialize(split, HadoopUtils.createTaskAttemptContext(c, new TaskAttemptID()));
        l = 0;
        start = System.currentTimeMillis();
        while (reader2.nextKeyValue()) {
            reader2.getCurrentValue().toString();
            l++;
        }
        Assert.assertEquals(lineCount, l);
        System.out.printf("text line reader with null x,y ignore: %d\n", System.currentTimeMillis() - start);

    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        fs.close();
    }
}