List of usage examples for org.apache.solr.update.processor UpdateRequestProcessor UpdateRequestProcessor
public UpdateRequestProcessor(UpdateRequestProcessor next)
From source file:org.soas.solr.update.processor.AmbiguousTagCountUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {/*from w w w .j a v a2 s . co m*/ return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); if (doc.containsKey(wordsFieldName)) { String[] words = ((String) doc.getField(wordsFieldName).getValue()).trim().split("\\s+"); //Pattern tagPattern = Pattern.compile("\\[[^\\]]+\\]"); Pattern tagPattern = Pattern.compile("\\]\\["); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); if (doc.containsKey(guessFieldName + "_" + next)) { String[] guess = ((String) doc.getField(guessFieldName + "_" + next).getValue()) .trim().split("\\s+"); int match_count = 0; int correct_count = 0; int tag_count = 0; //assume all tags include brackets for (int i = 0; i < words.length; i++) { String wTag = words[i].substring(words[i].indexOf('|') + 1); int gIndex = guess[i].indexOf('|'); if (gIndex != -1) { String gTag = guess[i].substring(gIndex + 1); if (gTag.length() > 0) { List<String> wordTags = Arrays .asList(tagPattern.split(wTag.substring(1, wTag.length() - 1))); List<String> guessedTags = Arrays .asList(tagPattern.split(gTag.substring(1, gTag.length() - 1))); if (guessedTags.equals(wordTags)) { correct_count++; match_count++; } else if (guessedTags.containsAll(wordTags)) { match_count++; } tag_count += guessedTags.size(); } } } SolrInputField tokenCountField = new SolrInputField( tokenCountFieldName + "_" + next); tokenCountField.addValue(new Integer(words.length), 1.0f); doc.put(tokenCountFieldName + "_" + next, tokenCountField); SolrInputField matchCountField = new SolrInputField( matchCountFieldName + "_" + next); matchCountField.addValue(new Integer(match_count), 1.0f); doc.put(matchCountFieldName + "_" + next, matchCountField); SolrInputField correctCountField = new SolrInputField( correctCountFieldName + "_" + next); correctCountField.addValue(new Integer(correct_count), 1.0f); doc.put(correctCountFieldName + "_" + next, correctCountField); SolrInputField tagCountField = new SolrInputField(tagCountFieldName + "_" + next); tagCountField.addValue(new Integer(tag_count), 1.0f); doc.put(tagCountFieldName + "_" + next, tagCountField); } } } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.AppendTagUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {/*from w w w .j a v a2 s . c om*/ return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); if (doc.containsKey(sourceFieldName) && doc.containsKey(posFieldName)) { String sourceFieldValue = (String) doc.getFieldValue(sourceFieldName); String posFieldValue = (String) doc.getFieldValue(posFieldName); String[] source = sourceFieldValue.split("\\s+"); String[] pos = posFieldValue.split("\\s+"); if (source.length == pos.length) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < source.length; i++) { sb.append(source[i]); sb.append(pos[i].substring(pos[i].indexOf('|'))); sb.append(' '); } doc.setField(sourceFieldName, sb.toString(), 1.0f); } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.SegScoreUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {//from w ww .java 2s . c o m return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); // preserve initial values and boost (if any) SolrInputField highlightField = null; String prefix = null; String suffix = null; if (highlightFieldName != null) { highlightField = new SolrInputField(highlightFieldName + "_" + next); if (errorTag == null) errorTag = DEFAULT_ERROR_TAG; prefix = "<" + errorTag + ">"; suffix = "</" + errorTag + ">"; } if (doc.containsKey(guessFieldName + "_" + next)) { String guess = ((String) doc.getField(guessFieldName + "_" + next).getValue()).trim(); Pattern pattern = Pattern.compile("\\S+"); Matcher guessMatcher = pattern.matcher(guess); int g_count = 0; if (doc.containsKey(wordsFieldName)) { String words = ((String) doc.getField(wordsFieldName).getValue()).trim(); Matcher wordsMatcher = pattern.matcher(words); int correct = 0; int w_count = 0; int w_start = 0; int w_end = 0; int g_start = 0; int g_end = 0; List<String> out = new LinkedList<String>(); while (wordsMatcher.find()) { w_start = wordsMatcher.start() - w_count; w_end = wordsMatcher.end() - w_count; w_count += 1; //debug: out = out.concat(" w{" + w_start + "," + w_end + "}:" + words.substring(wordsMatcher.start(), wordsMatcher.end())); while (g_end < w_end && guessMatcher.find()) { g_start = guessMatcher.start() - g_count; g_end = guessMatcher.end() - g_count; g_count += 1; //debug: out = out.concat(" g{" + g_start + "," + g_end + "}:" + guess.substring(guessMatcher.start(), guessMatcher.end())); if (g_start == w_start && g_end == w_end) { //debug: out = out.concat("|Y"); if (null != highlightField) { out.add(guess.substring(guessMatcher.start(), guessMatcher.end())); } correct += 1; } else if (null != highlightField) { out.add(prefix + guess.substring(guessMatcher.start(), guessMatcher.end()) + suffix); } } } SolrInputField tokenCountField = new SolrInputField( tokenCountFieldName + "_" + next); tokenCountField.addValue(new Integer(w_count), 1.0f); doc.put(tokenCountFieldName + "_" + next, tokenCountField); SolrInputField guessCountField = new SolrInputField( guessCountFieldName + "_" + next); guessCountField.addValue(new Integer(g_count), 1.0f); doc.put(guessCountFieldName + "_" + next, guessCountField); SolrInputField correctCountField = new SolrInputField( correctCountFieldName + "_" + next); correctCountField.addValue(new Integer(correct), 1.0f); doc.put(correctCountFieldName + "_" + next, correctCountField); if (null != highlightField) { highlightField.addValue(StringUtils.join(out.iterator(), DELIMITER), 1.0f); doc.put(highlightFieldName + "_" + next, highlightField); } } else { while (guessMatcher.find()) { g_count++; } SolrInputField guessCountField = new SolrInputField( guessCountFieldName + "_" + next); guessCountField.addValue(new Integer(g_count), 1.0f); doc.put(guessCountFieldName + "_" + next, guessCountField); } } } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.TagCompareUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {//from www . ja v a 2 s . com return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); if (doc.containsKey(guessFieldName + "_" + next)) { String guesses[] = ((String) doc.getField(guessFieldName + "_" + next).getValue()) .split("\\s+"); if (doc.containsKey(tagsFieldName)) { String tags[] = ((String) doc.getField(tagsFieldName).getValue()).split("\\s+"); //should be an argument char delimiter = '|'; int correct = 0; //need to make sure tags and guesses are same length for (int i = 0; i < tags.length; i++) { if (tags[i].substring(tags[i].indexOf(delimiter) + 1) .equals(guesses[i].substring(guesses[i].indexOf(delimiter) + 1))) { correct++; } } SolrInputField tokenCountField = new SolrInputField( tokenCountFieldName + "_" + next); tokenCountField.addValue(new Integer(tags.length), 1.0f); doc.put(tokenCountFieldName + "_" + next, tokenCountField); SolrInputField correctCountField = new SolrInputField( correctCountFieldName + "_" + next); correctCountField.addValue(new Integer(correct), 1.0f); doc.put(correctCountFieldName + "_" + next, correctCountField); } SolrInputField guessCountField = new SolrInputField(guessCountFieldName + "_" + next); guessCountField.addValue(new Integer(guesses.length), 1.0f); doc.put(guessCountFieldName + "_" + next, guessCountField); } } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.TagScoreUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {//from ww w . ja v a 2s .c o m return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); SolrInputField errorEField = null; if (errorEFieldName != null) { errorEField = new SolrInputField(errorEFieldName + "_" + next); } if (doc.containsKey(guessFieldName + "_" + next)) { String guess = ((String) doc.getField(guessFieldName + "_" + next).getValue()).trim(); Pattern pattern = Pattern.compile("\\S+"); Matcher guessMatcher = pattern.matcher(guess); int g_count = 0; if (doc.containsKey(wordsFieldName)) { String words = ((String) doc.getField(wordsFieldName).getValue()).trim(); Matcher wordsMatcher = pattern.matcher(words); int correct = 0; int w_count = 0; int w_start = 0; int w_end = 0; int g_start = 0; int g_end = 0; int w_taglength = 0; int g_taglength = 0; String w_tag = new String(""); String g_tag = new String(""); List<String> errorEOut = new LinkedList<String>(); while (wordsMatcher.find()) { w_start = wordsMatcher.start() - w_count - w_taglength; w_tag = words.substring(words.indexOf('|', wordsMatcher.start()), wordsMatcher.end()); w_taglength += w_tag.length(); w_end = wordsMatcher.end() - w_count - w_taglength; w_count += 1; while (g_end < w_end && guessMatcher.find()) { g_start = guessMatcher.start() - g_count - g_taglength; g_tag = guess.substring(guess.indexOf('|', guessMatcher.start()), guessMatcher.end()); g_taglength += g_tag.length(); g_end = guessMatcher.end() - g_count - g_taglength; g_count += 1; if (g_start == w_start && g_end == w_end && w_tag.equals(g_tag)) { if (null != errorEField) { errorEOut.add("A|" + guess.substring(guessMatcher.start(), guessMatcher.end())); } correct += 1; } else { //prefix = "<" + errorTag + " data-correct='" + words.substring(wordsMatcher.start(), wordsMatcher.end()) + "'>"; if (null != errorEField) { errorEOut.add("E|" + guess.substring(guessMatcher.start(), guessMatcher.end())); } } } } SolrInputField tokenCountField = new SolrInputField( tokenCountFieldName + "_" + next); tokenCountField.addValue(new Integer(w_count), 1.0f); doc.put(tokenCountFieldName + "_" + next, tokenCountField); SolrInputField guessCountField = new SolrInputField( guessCountFieldName + "_" + next); guessCountField.addValue(new Integer(g_count), 1.0f); doc.put(guessCountFieldName + "_" + next, guessCountField); SolrInputField correctCountField = new SolrInputField( correctCountFieldName + "_" + next); correctCountField.addValue(new Integer(correct), 1.0f); doc.put(correctCountFieldName + "_" + next, correctCountField); if (null != errorEField) { errorEField.addValue(StringUtils.join(errorEOut.iterator(), DELIMITER), 1.0f); doc.put(errorEFieldName + "_" + next, errorEField); } } else { while (guessMatcher.find()) { g_count++; } SolrInputField guessCountField = new SolrInputField( guessCountFieldName + "_" + next); guessCountField.addValue(new Integer(g_count), 1.0f); doc.put(guessCountFieldName + "_" + next, guessCountField); } } } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.UnambiguousTagCountUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {// w ww . ja v a 2 s. com return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); if (doc.containsKey(wordsFieldName)) { String[] words = ((String) doc.getField(wordsFieldName).getValue()).trim().split("\\s+"); //Pattern tagPattern = Pattern.compile("\\[[^\\]]+\\]"); Pattern tagPattern = Pattern.compile("\\]\\["); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); if (doc.containsKey(guessFieldName + "_" + next)) { String[] guess = ((String) doc.getField(guessFieldName + "_" + next).getValue()) .trim().split("\\s+"); int match_count = 0; int correct_count = 0; int tag_count = 0; for (int i = 0; i < words.length; i++) { String wTag = words[i].substring(words[i].indexOf('|') + 1); String gTag = guess[i].substring(guess[i].indexOf('|') + 1); if (gTag.length() > 0) { if (gTag.charAt(0) != '[') { tag_count++; if (wTag.equals(gTag)) { match_count++; correct_count++; } } else { List<String> guessedTags = Arrays .asList(tagPattern.split(gTag.substring(1, gTag.length() - 1))); if (guessedTags.contains(wTag)) { match_count++; if (guessedTags.size() == 1) { correct_count++; } } tag_count += guessedTags.size(); } } } SolrInputField tokenCountField = new SolrInputField( tokenCountFieldName + "_" + next); tokenCountField.addValue(new Integer(words.length), 1.0f); doc.put(tokenCountFieldName + "_" + next, tokenCountField); SolrInputField matchCountField = new SolrInputField( matchCountFieldName + "_" + next); matchCountField.addValue(new Integer(match_count), 1.0f); doc.put(matchCountFieldName + "_" + next, matchCountField); SolrInputField correctCountField = new SolrInputField( correctCountFieldName + "_" + next); correctCountField.addValue(new Integer(correct_count), 1.0f); doc.put(correctCountFieldName + "_" + next, correctCountField); SolrInputField tagCountField = new SolrInputField(tagCountFieldName + "_" + next); tagCountField.addValue(new Integer(tag_count), 1.0f); doc.put(tagCountFieldName + "_" + next, tagCountField); } } } } super.processAdd(cmd); } }; }
From source file:org.soas.solr.update.processor.WhereDifferentUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {/*from w w w . j av a2 s. c o m*/ return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); Collection c = doc.getFieldValues(tagFieldName); if (c != null) { Iterator it = c.iterator(); while (it.hasNext()) { String next = (String) it.next(); if (doc.containsKey(posFieldName + "_" + next) && doc.containsKey(compareFieldName + "_" + next)) { String posFieldValue = (String) doc.getFieldValue(posFieldName + "_" + next); String[] pos = posFieldValue.split("\\s+"); String compareFieldValue = (String) doc.getFieldValue(compareFieldName + "_" + next); String[] compare = compareFieldValue.split("\\s+"); //log.info("posFieldValue = " + posFieldValue); //log.info("compareFieldValue = " + compareFieldValue); if (compare.length == pos.length && !(compare.length == 1 && compare[0].equals(compareFieldValue))) { //Pattern oneTag = Pattern.compile("\\[?([^\\]]+)\\]?"); Pattern splitter = Pattern.compile("\\]\\["); StringBuffer sbDiff = new StringBuffer(); StringBuffer sbChange = new StringBuffer(); for (int i = 0; i < compare.length; i++) { sbDiff.append(pos[i]); String tags = compare[i].substring(compare[i].indexOf('|') + 1); if (tags.charAt(0) == '[') { tags = tags.substring(1, tags.length() - 1); //strip [ and ] } //Matcher m = oneTag.matcher(tags); //if (m.matches()) { String[] tagList = splitter.split(tags); String posRef = pos[i].substring(pos[i].indexOf('|') + 1); boolean match = false; for (int k = 0; k < tagList.length; k++) { //String tag = m.group(1); //tags.substring(1, tags.length()-1); //if (!tag.equals(pos[i].substring(pos[i].indexOf('|')+1))) { if (tagList[k].equals(posRef)) { match = true; break; } } if (!match) { sbDiff.append(diffDelim); sbDiff.append(StringUtils.join(tagList, "~")); } /* sbDiff.append(diffDelim); sbDiff.append(tag); sbChange.append(pos[i].substring(0, pos[i].indexOf('|'))); sbChange.append(diffDelim); sbChange.append(tag); } else { sbChange.append(pos[i]); } } else { sbChange.append(pos[i]); } */ sbDiff.append(' '); sbChange.append(' '); } sbDiff.deleteCharAt(sbDiff.length() - 1); //remove final space sbChange.deleteCharAt(sbChange.length() - 1); //remove final space if (differentFieldName != null) { SolrInputField differentField = new SolrInputField( differentFieldName + "_" + next); differentField.setValue(sbDiff.toString(), 1.0f); doc.put(differentFieldName + "_" + next, differentField); } if (changeFieldName != null) { SolrInputField changeField = new SolrInputField(changeFieldName + "_" + next); changeField.setValue(sbChange.toString(), 1.0f); doc.put(changeFieldName + "_" + next, changeField); } } } } } super.processAdd(cmd); } }; }
From source file:org.thdl.tib.solr.PageTransitionUpdateProcessorFactory.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {/* w w w. j av a 2s . c o m*/ return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // preserve initial values and boost (if any) SolrInputField destField = doc.containsKey(destFieldName) ? doc.getField(destFieldName) : new SolrInputField(destFieldName); String trans = (String) doc.getField(transFieldName).getValue(); String page = (String) doc.getField(pageFieldName).getValue(); boolean matched = false; int i = 1; for (; i < page.length() + 1; i++) { if (trans.endsWith(page.substring(0, i))) { matched = true; break; } } //trans = trans.concat(DELIMITER); String joined = matched ? trans.concat(page.substring(i)) : trans.concat(page); destField.addValue(joined, 1.0f); doc.put(destFieldName, destField); super.processAdd(cmd); } }; }
From source file:org.thdl.tib.solr.PageTransitionUPF.java
License:Apache License
@Override public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {//from www . j av a 2 s . c om return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // preserve initial values and boost (if any) SolrInputField destField = doc.containsKey(destFieldName) ? doc.getField(destFieldName) : new SolrInputField(destFieldName); if (doc.containsKey(pageFieldName)) { String page = (String) doc.getField(pageFieldName).getValue(); if (doc.containsKey(transFieldName)) { String trans = (String) doc.getField(transFieldName).getValue(); boolean matched = false; int i = 1; for (; i < page.length() + 1; i++) { if (trans.endsWith(page.substring(0, i))) { matched = true; break; } } //trans = trans.concat(DELIMITER); String joined = matched ? trans.concat(page.substring(i)) : trans.concat(page); destField.addValue(joined, 1.0f); } else { destField.addValue(page, 1.0f); } doc.put(destFieldName, destField); } super.processAdd(cmd); } }; }