List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.bark.hadoop.lab3.RedLinkMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //Possible single element nowiki tag makes xmlstreamparser to stop. Remove them first. String fixed = value.toString().replaceAll("<nowiki />|<nowiki />", ""); try {//from w w w . j a v a2s .com XMLStreamReader reader = XMLInputFactory.newInstance() .createXMLStreamReader(new ByteArrayInputStream(fixed.getBytes())); String title = ""; String textData = ""; String currentElement = ""; while (reader.hasNext()) { int code = reader.next(); switch (code) { case START_ELEMENT: currentElement = reader.getLocalName(); break; case CHARACTERS: if (currentElement.equalsIgnoreCase("title")) { title += reader.getText(); } else if (currentElement.equalsIgnoreCase("text")) { textData += reader.getText(); } break; } } reader.close(); //At this point we have the title and text data ready. title = title.trim().replaceAll(" ", "_"); /** * Find type 1 links e.g. [[some text]] and type 2 links [[a|b]] */ ArrayList<String> myLinks = new ArrayList<>(); try { myLinks = findLinks(textData); } catch (Exception e) { Logger.getLogger(RedLinkMapper.class.getName()).log(Level.SEVERE, e.getMessage(), e); } /** * For every title that exists, write the title and "!" */ context.write(new Text(title), new Text("!")); for (int i = 0; i < myLinks.size(); i++) { //Write (link,title) pairs (inlinks) (multiple writes are ok) String temp = myLinks.get(i).replaceAll(" ", "_").split("\\|")[0]; if (!title.equals(temp)) { context.write(new Text(temp), new Text(title)); } } } catch (XMLStreamException ex) { Logger.getLogger(RedLinkMapper.class.getName()).log(Level.SEVERE, ex.toString(), ex); } }
From source file:com.bark.hadoop.lab3.RedLinkReducer.java
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { boolean isRedLink = true; //hashset to remove duplicates HashSet<String> myValues = new HashSet<>(); for (Text t : values) { //if there exists a pair for page A with value ! ( ie. (A,!) ) page A exists and therefor the link is not a redlink if (t.toString().trim().equalsIgnoreCase("!")) { isRedLink = false;//from w w w. j a v a2 s . c om } myValues.add(t.toString().trim()); } //if the link is not identified as redlink, write it to ouput. else ignore. if (!isRedLink) { for (String t : myValues) { context.write(key, new Text(t)); } } }
From source file:com.bark.hadoop.lab3.SortMapper.java
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { double pageRank = 0; //intermediate files identify pageranks with _!0.0000.. pattern. Pattern pt = Pattern.compile("(_!\\d+.\\S+)"); Matcher mt = pt.matcher(value.toString()); if (mt.find()) { pageRank = Double.parseDouble(mt.group(1).substring(2)); }/* w w w . jav a2 s .c om*/ //ignore cases with pageranks below 5/N double minThreshold = 5d / (context.getConfiguration().getInt("N", 0)); if (pageRank >= minThreshold) { context.write(new DoubleWritable(pageRank), new Text(value.toString().split("\t")[0])); } }
From source file:com.basho.riak.hadoop.RiakRecordWriter.java
License:Apache License
@Override public void write(Text key, V value) throws IOException, InterruptedException { try {/*from ww w.j a va 2 s . c o m*/ Location location = new Location(ns, key.toString()); // Store object with default options StoreValue sv = new StoreValue.Builder(value).withLocation(location).build(); StoreValue.Response svResponse = client.execute(sv); } catch (ExecutionException e) { throw new IOException(e); } }
From source file:com.bizosys.hsearch.kv.impl.bytescooker.IndexField.java
License:Apache License
public byte[] index(Iterable<Text> values) throws IOException { byte[] finalData = null; boolean hasValue = false; String[] resultValue = new String[2]; String line = null;//from w w w. ja v a 2 s . c o m String currentF = null; try { for (Text text : values) { if (null == text) continue; Arrays.fill(resultValue, null); line = text.toString(); int index = line.indexOf(KVIndexer.FIELD_SEPARATOR); if (index >= 0) { resultValue[0] = line.substring(0, index); if (index <= line.length() - 1) resultValue[1] = line.substring(index + 1); } currentF = resultValue[0]; int containerKey = Integer.parseInt(currentF); hasValue = true; add(containerKey, resultValue[1]); } } catch (NumberFormatException ex) { ex.printStackTrace(); throw new IOException("Unable to parse number - [" + currentF + "] for input " + line + " with line sep :" + KVIndexer.FIELD_SEPARATOR + " because " + ex.getMessage()); } if (hasValue) { finalData = getBytes(); } return finalData; }
From source file:com.bizosys.hsearch.kv.indexing.KVMapperFile.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (isSkipHeader) { isSkipHeader = false;//ww w.j av a 2 s . com if (0 == key.get()) return; } if (null == result) { ArrayList<String> resultL = new ArrayList<String>(); LineReaderUtil.fastSplit(resultL, value.toString(), KVIndexer.FIELD_SEPARATOR); result = new String[resultL.size()]; } Arrays.fill(result, null); LineReaderUtil.fastSplit(result, value.toString(), KVIndexer.FIELD_SEPARATOR); kBase.map(result, context); }
From source file:com.bizosys.hsearch.kv.indexing.KVMapperHFile.java
License:Apache License
@Override protected void map(Text key, ImmutableBytesWritable value, Context context) { try {/* w w w . j a v a 2 s . co m*/ String rowKey = key.toString(); byte[] data = value.copyBytes(); hKey.set(rowKey.getBytes()); KeyValue kv = new KeyValue(hKey.get(), familyName, qualifier, data); context.write(hKey, kv); } catch (Exception e) { System.err.println( "Error in processing for row key : " + key.toString() + "\t and value size " + value.getLength() + "\n Memory total:max:free(MB) " + Runtime.getRuntime().totalMemory() / 1024 * 1024 + " : " + Runtime.getRuntime().maxMemory() / 1024 * 1024 + " : " + Runtime.getRuntime().freeMemory() / 1024 * 1024 + " : "); } }
From source file:com.bizosys.hsearch.kv.indexing.KVMapperLocal.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (null == result) { ArrayList<String> resultL = new ArrayList<String>(); LineReaderUtil.fastSplit(resultL, value.toString(), KVIndexer.FIELD_SEPARATOR); result = new String[resultL.size()]; }/*from w w w. j a v a 2 s. co m*/ Arrays.fill(result, null); LineReaderUtil.fastSplit(result, value.toString(), KVIndexer.FIELD_SEPARATOR); kBase.map(result, context); }
From source file:com.bizosys.hsearch.kv.indexing.KVMapperMapFile.java
License:Apache License
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { if (isSkipHeader) { isSkipHeader = false;//from w ww . j a v a 2 s.c o m //if ( 0 == key.get()) return; } List<String> eachRow = new ArrayList<String>(); LineReaderUtil.fastSplit(eachRow, value.toString(), LINE_SEPARATOR); for (String row : eachRow) { if (null == result) { ArrayList<String> resultL = new ArrayList<String>(); LineReaderUtil.fastSplit(resultL, row, KVIndexer.FIELD_SEPARATOR); result = new String[resultL.size()]; } Arrays.fill(result, null); LineReaderUtil.fastSplit(result, row, KVIndexer.FIELD_SEPARATOR); kBase.map(result, context); } }
From source file:com.bizosys.hsearch.kv.indexing.KVReducerBase.java
License:Apache License
public byte[] cookBytes(StringBuilder key, Iterable<Text> values, byte[] existingData, Field fld, char dataTypeChar) throws IOException { byte[] finalData = null; String fieldName = null;/* w w w . ja v a2 s . c o m*/ boolean compressed = false; boolean repeatable = false; boolean analyzed = false; if (null != fld) { fieldName = fld.name; compressed = fld.isCompressed; repeatable = fld.isRepeatable; analyzed = fld.isAnalyzed; } switch (dataTypeChar) { case 't': finalData = IndexFieldString.cook(values, existingData, repeatable, compressed); break; case 'e': /** * Skip multi phrases which are only sighted once. */ int keyLen = key.length(); boolean skipSingle = false; if (keyLen > 1) { skipSingle = (key.charAt(keyLen - 1) == '*'); if (skipSingle) key = key.delete(keyLen - 2, keyLen); } finalData = (repeatable) ? indexTextBitset(skipSingle, existingData, values, analyzed, fieldName, compressed) : indexTextSet(skipSingle, existingData, values, analyzed, fieldName); break; case 'i': finalData = IndexFieldInteger.cook(values, existingData, repeatable, compressed); break; case 'f': finalData = IndexFieldFloat.cook(values, existingData, repeatable, compressed); break; case 'd': finalData = IndexFieldDouble.cook(values, existingData, repeatable, compressed); break; case 'l': finalData = IndexFieldLong.cook(values, existingData, repeatable, compressed); break; case 's': finalData = IndexFieldShort.cook(values, existingData, repeatable, compressed); break; case 'b': finalData = IndexFieldBoolean.cook(values, existingData, repeatable, compressed); break; case 'c': finalData = IndexFieldByte.cook(values, existingData, repeatable, compressed); break; default: { List<String> mergeKeys = new ArrayList<String>(); for (Text mergeKey : values) { mergeKeys.add(mergeKey.toString()); } finalData = SortedBytesString.getInstance().toBytes(mergeKeys); break; } } return finalData; }