cascading.RegressionMiscTest.java Source code

Java tutorial

Introduction

Here is the source code for cascading.RegressionMiscTest.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading;

import java.io.File;
import java.io.IOException;

import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.operation.Debug;
import cascading.operation.Identity;
import cascading.operation.regex.RegexFilter;
import cascading.operation.regex.RegexSplitter;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.TextLine;
import cascading.tap.Hfs;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryIterator;
import org.apache.hadoop.mapred.JobConf;

public class RegressionMiscTest extends CascadingTestCase {
    String inputFileNums10 = "build/test/data/nums.10.txt";

    String outputPath = "build/test/output/regressionmisc/";

    public RegressionMiscTest() {
        super("regression misc");
    }

    /**
     * sanity check to make sure writeDOT still works
     *
     * @throws Exception
     */
    public void testWriteDot() throws Exception {
        Tap source = new Hfs(new TextLine(new Fields("offset", "line")), "/input");
        Tap sink = new Hfs(new TextLine(), outputPath + "/unknown", true);

        Pipe pipe = new Pipe("test");

        pipe = new Each(pipe, new Fields("line"), new RegexSplitter(Fields.UNKNOWN));

        pipe = new Each(pipe, new Debug());

        pipe = new Each(pipe, new Fields(2), new Identity(new Fields("label")));

        pipe = new Each(pipe, new Debug());

        pipe = new Each(pipe, new Fields("label"), new RegexFilter("[A-Z]*"));

        pipe = new Each(pipe, new Debug());

        Flow flow = new FlowConnector().connect(source, sink, pipe);

        new File(outputPath).mkdirs();

        flow.writeDOT(outputPath + "/writedot.dot");
    }

    /**
     * verifies sink fields are consulted during planning
     *
     * @throws IOException
     */
    public void testSinkDeclaredFieldsFails() throws IOException {
        Tap source = new Hfs(new TextLine(new Fields("line")), "/input");

        Pipe pipe = new Pipe("test");

        pipe = new Each(pipe, new RegexSplitter(new Fields("first", "second", "third"), "\\s"), Fields.ALL);

        Tap sink = new Hfs(new TextLine(new Fields("line"), new Fields("first", "second", "fifth")), "output",
                true);

        try {
            Flow flow = new FlowConnector().connect(source, sink, pipe);
            fail("did not fail on bad sink field names");
        } catch (Exception exception) {
            // ignore
        }
    }

    public void testTupleEntryNextTwice() throws IOException {
        Tap tap = new Hfs(new TextLine(), inputFileNums10);

        TupleEntryIterator iterator = tap.openForRead(new JobConf());

        int count = 0;
        while (iterator.hasNext()) {
            iterator.next();
            count++;
        }

        assertFalse(iterator.hasNext());
        assertEquals(10, count);
    }

}