Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java

License:Apache License

@Override
public void map(Text key, Text val, Context context) throws IOException, InterruptedException {
    if (key == null) {
        throw new IOException("Key is null");
    }/*  w  w  w .java 2 s .  co  m*/
    HashSet<String> nGramList = new HashSet<String>();
    int gramSize = 2;
    nGramList = getNGrams(key, gramSize);
    for (String nGrams : nGramList) {
        String value = key.toString() + "delimiterBetweenKeyAndValue" + val.toString();
        context.write(new Text(nGrams), new Text(value));
        logger.info("Key and Value in NGram Mapper is: " + new Text(nGrams) + ", " + new Text(value));
    }
}

From source file:co.nubetech.hiho.similarity.ngram.NGramMapper.java

License:Apache License

public HashSet<String> getNGrams(Text line, int gramSize) {
    ArrayList<String> words = new ArrayList<String>();
    HashSet<String> nGrams = new HashSet<String>();
    String[] tokens = line.toString().split(" ");
    for (String t : tokens) {
        words.add(t);//from w  ww. j a va2s . co m
    }
    for (int i = 0; i < words.size() - gramSize + 1; i++) {
        String key = "";
        for (int j = i; j < i + gramSize; j++) {
            key += words.get(j);
            if (j != (i + gramSize - 1)) {
                key += " ";
            }
        }
        nGrams.add(key);
    }
    return nGrams;
}

From source file:com.acme.io.JsonLoader.java

License:Apache License

/**
 * Retrieves the next tuple to be processed. Implementations should NOT
 * reuse tuple objects (or inner member objects) they return across calls
 * and should return a different tuple object in each call.
 * @return the next tuple to be processed or null if there are no more
 * tuples to be processed.//from   ww w  .  ja  va2 s.  c  o m
 * @throws IOException if there is an exception while retrieving the next
 * tuple
 */
public Tuple getNext() throws IOException {
    Text val = null;
    try {
        // Read the next key value pair from the record reader.  If it's
        // finished, return null
        if (!reader.nextKeyValue())
            return null;

        // Get the current value.  We don't use the key.
        val = (Text) reader.getCurrentValue();
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }

    // Create a parser specific for this input line.  This may not be the
    // most efficient approach.
    ByteArrayInputStream bais = new ByteArrayInputStream(val.getBytes());
    JsonParser p = jsonFactory.createJsonParser(bais);

    // Create the tuple we will be returning.  We create it with the right
    // number of fields, as the Tuple object is optimized for this case.
    Tuple t = tupleFactory.newTuple(fields.length);

    // Read the start object marker.  Throughout this file if the parsing
    // isn't what we expect we return a tuple with null fields rather than
    // throwing an exception.  That way a few mangled lines don't fail the
    // job.
    if (p.nextToken() != JsonToken.START_OBJECT) {
        log.warn("Bad record, could not find start of record " + val.toString());
        return t;
    }

    // Read each field in the record
    for (int i = 0; i < fields.length; i++) {
        t.set(i, readField(p, fields[i], i));
    }

    if (p.nextToken() != JsonToken.END_OBJECT) {
        log.warn("Bad record, could not find end of record " + val.toString());
        return t;
    }
    p.close();
    return t;
}

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.PhoenixCsvToKeyValueMapper.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/* www . j  a  v a  2 s . co m*/
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String lineStr = value.toString();
    // ?
    if (null != lineStr && lineStr.length() > 1) {
        // ?rowkey(hash?++md5?)????
        lineStr = generateRowKey(lineStr) + separator + lineStr;
    }
    ImmutableBytesWritable outputKey = new ImmutableBytesWritable();
    try {
        CSVRecord csvRecord = null;
        try {
            csvRecord = csvLineParser.parse(lineStr);
        } catch (IOException e) {
            context.getCounter(COUNTER_GROUP_NAME, "CSV Parser errors").increment(1L);
        }

        if (csvRecord == null) {
            context.getCounter(COUNTER_GROUP_NAME, "Empty records").increment(1L);
            return;
        }
        csvUpsertExecutor.execute(ImmutableList.of(csvRecord));

        Iterator<Pair<byte[], List<KeyValue>>> uncommittedDataIterator = PhoenixRuntime
                .getUncommittedDataIterator(conn);
        while (uncommittedDataIterator.hasNext()) {
            Pair<byte[], List<KeyValue>> kvPair = uncommittedDataIterator.next();
            List<KeyValue> keyValueList = kvPair.getSecond();
            keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList);
            for (KeyValue kv : keyValueList) {
                outputKey.set(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength());
                context.write(outputKey, kv);
            }
        }
        conn.rollback();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 *///from ww w  .j av a  2s  . com
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();
    ts = System.currentTimeMillis();

    try {
        MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey

        Put put = new Put(newRowKey.getBytes());
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            String columnQualifierStr = new String(parser.getQualifier(i));
            String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr);
            if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) {
                continue;
            }
            KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength
                    parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
                    parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
                    parsed.getColumnLength(i));

            KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts,
                    kv.getValue());
            kv = null;
            put.add(newKv);
        }
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put);
    } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (IllegalArgumentException e) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(e);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (RowKeyGeneratorException e) {
        System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage());
        throw new IOException(e);
    } finally {
        totalLineCount.increment(1);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 * /*from   w  w w.  j  ava2  s.  c  o  m*/
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        //
        Text[] texts = new Text[parsed.getColumnCount()];
        int index = 0;
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            //            if (i == parser.getRowKeyColumnIndex()){
            //               continue;
            //            }
            text = new Text();
            //?
            text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i));
            texts[index] = text;
            index++;
        }
        writer.set(texts);
        /*
        //rowkey
        String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
                
        // hash rowkey
        String newRowKey = oriRowKey;
        if(rowkeyGenerator != null){
           newRowKey = (String)rowkeyGenerator.generate(oriRowKey);
        }
        */
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey
        //LOG.info("single column newRowKey = " + newRowKey);
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer);
    } catch (BadTsvLineException badLine) {
        if (skipBadLines) {
            LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            badLineCount.increment(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.alectenharmsel.research.LineCountMapper.java

License:Apache License

public void map(Text key, Text contents, Context context) throws IOException, InterruptedException {
    long numLines = 0;
    String tmp = contents.toString();

    for (int i = 0; i < tmp.length(); i++) {
        if (tmp.charAt(i) == '\n') {
            numLines++;/*from w  ww  .j a va  2s .c om*/
        }
    }

    context.write(key, new LongWritable(numLines));
}

From source file:com.alectenharmsel.research.MoabLicensesMapper.java

License:Apache License

public void map(LongWritable key, Text contents, Context context) throws IOException, InterruptedException {
    if (contents.toString().contains("License")) {
        String date = "";
        String licenseInfo = "";
        String pkgName = "";
        ArrayList<String> license = new ArrayList<String>();
        String[] blah = contents.toString().split(" ");

        for (String tmp : blah) {
            if (tmp.length() != 0) {
                license.add(tmp);/*ww  w .jav  a  2s. co m*/
            }
        }

        if (license.size() != 13) {
            return;
        }

        date = license.get(0).replaceAll("/", "-");
        pkgName = license.get(4);
        licenseInfo += license.get(5) + "," + license.get(7);
        context.write(new Text(pkgName + "-" + date), new Text(licenseInfo));
    }
}

From source file:com.alectenharmsel.research.MoabLicensesReducer.java

License:Apache License

public void reduce(Text key, Iterable<Text> counts, Context context) throws IOException, InterruptedException {
    int sum = 0;/*from  www.java 2 s  .c  o  m*/
    int num = 0;
    int total = 0;

    for (Text tmp : counts) {
        String[] split = tmp.toString().split(",");
        sum += Integer.parseInt(split[0]);
        total += Integer.parseInt(split[1]);
        num++;
    }

    double avgAvail = (double) sum / (double) num;
    String avgTotal = "";
    if (total % num == 0) {
        avgTotal = String.valueOf(total / num);
    } else {
        avgTotal = String.valueOf((double) total / (double) num);
    }

    String[] keyArr = key.toString().split("-");
    String keyOut = keyArr[keyArr.length - 2] + "-" + keyArr[keyArr.length - 1];

    keyOut += ",";
    for (int i = 0; i < keyArr.length - 2; i++) {
        if (i > 0) {
            keyOut += "-";
        }
        keyOut += keyArr[i];
    }

    context.write(new Text(keyOut), new Text(avgAvail + "," + avgTotal));
}

From source file:com.alectenharmsel.research.MoabLogSearchMapper.java

License:Apache License

public void map(LongWritable key, Text contents, Context context) throws IOException, InterruptedException {
    String tmp = contents.toString();

    if (tmp.contains("ERROR")) {
        context.write(new LongWritable(0), contents);
    }/*from   w ww . j a v  a  2  s .  c o m*/
}