List of usage examples for org.apache.solr.common SolrDocument getFieldValue
@Override
public Object getFieldValue(String name)
From source file:IndexDeletion_p.java
License:Open Source License
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); SolrConnector defaultConnector = sb.index.fulltext().getDefaultConnector(); SolrConnector webgraphConnector = sb.index.fulltext().getWebgraphConnector(); if (post == null || post.size() == 0) defaultConnector.commit(false); // we must do a commit here because the user cannot see a proper count. String schemaName = CollectionSchema.CORE_NAME; if (post != null) schemaName = post.get("core", schemaName); // Delete by URL Matching String urldelete = post == null ? "" : post.get("urldelete", ""); boolean urldelete_mm_subpath_checked = post == null ? true : post.get("urldelete-mm", "subpath").equals("subpath"); prop.putHTML("urldelete", urldelete); prop.put("urldelete-mm-subpath-checked", urldelete_mm_subpath_checked ? 1 : 0); prop.put("urldelete-mm-regexp-checked", urldelete_mm_subpath_checked ? 0 : 1); prop.put("urldelete-active", 0); // Delete by Age int timedelete_number = post == null ? 14 : post.getInt("timedelete-number", 14); String timedelete_unit = post == null ? "day" : post.get("timedelete-unit", "day"); boolean timedelete_source_loaddate_checked = post == null ? true : post.get("timedelete-source", "loaddate").equals("loaddate"); for (int i = 1; i <= 90; i++) prop.put("timedelete-n-" + i, 0); prop.put("timedelete-n-" + timedelete_number, timedelete_number); prop.put("timedelete-u-year", timedelete_unit.equals("year") ? 1 : 0); prop.put("timedelete-u-month", timedelete_unit.equals("month") ? 1 : 0); prop.put("timedelete-u-day", timedelete_unit.equals("day") ? 1 : 0); prop.put("timedelete-u-hour", timedelete_unit.equals("hour") ? 1 : 0); prop.put("timedelete-source-loaddate-checked", timedelete_source_loaddate_checked ? 1 : 0); prop.put("timedelete-source-lastmodified-checked", timedelete_source_loaddate_checked ? 0 : 1); prop.put("timedelete-active", 0); // Delete Collections boolean collectiondelete_mode_unassigned_checked = post == null ? true : post.get("collectiondelete-mode", "unassigned").equals("unassigned"); String collectiondelete = post == null ? "" : post.get("collectiondelete", ""); if (post != null && post.containsKey("collectionlist")) { collectiondelete_mode_unassigned_checked = false; prop.put("collectiondelete-select", 1); try {//from ww w . ja va 2 s .c o m ScoreMap<String> collectionMap = defaultConnector .getFacets("*:*", 1000, CollectionSchema.collection_sxt.getSolrFieldName()) .get(CollectionSchema.collection_sxt.getSolrFieldName()); Iterator<String> i = collectionMap.iterator(); int c = 0; while (i.hasNext()) { String collection = i.next(); prop.put("collectiondelete-select_list_" + c + "_collection-name", collection + "/" + collectionMap.get(collection)); prop.put("collectiondelete-select_list_" + c + "_collection-value", collection); c++; } prop.put("collectiondelete-select_list", c); } catch (final IOException e1) { prop.put("collectiondelete-select", 0); } } else { prop.put("collectiondelete-select", 0); } prop.put("collectiondelete-mode-unassigned-checked", collectiondelete_mode_unassigned_checked ? 1 : 0); prop.put("collectiondelete-mode-assigned-checked", collectiondelete_mode_unassigned_checked ? 0 : 1); prop.putHTML("collectiondelete-select_collectiondelete", collectiondelete); prop.put("collectiondelete-active", 0); // Delete by Solr Query prop.put("querydelete", ""); String querydelete = post == null ? "" : post.get("querydelete", ""); // simulate default search field if no field is given by adding text_t: as target field if (!querydelete.isEmpty() && !querydelete.contains(":")) querydelete = CollectionSchema.text_t.getSolrFieldName() + ":" + querydelete; prop.putHTML("querydelete", querydelete); prop.put("querydelete-active", 0); int count = post == null ? -1 : post.getInt("count", -1); if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) { boolean simulate = post.containsKey("simulate-urldelete"); // parse the input urldelete = urldelete.trim(); if (urldelete_mm_subpath_checked) { // collect using url stubs Set<String> ids = new HashSet<String>(); String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0 ? urldelete.split("[\\r\\n]+") : urldelete.split(Pattern.quote("|")); for (String urlStub : stubURLs) { if (urlStub == null || urlStub.length() == 0) continue; int pos = urlStub.indexOf("://", 0); if (pos == -1) { if (urlStub.startsWith("ftp")) urlStub = "ftp://" + urlStub; else urlStub = "http://" + urlStub; } try { DigestURL u = new DigestURL(urlStub); BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery( CollectionSchema.host_s.getSolrFieldName() + ":\"" + u.getHost() + "\"", null, 0, 100000000, Long.MAX_VALUE, 100, 1, false, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName()); SolrDocument doc; try { while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) { String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()); if (url.startsWith(urlStub)) ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())); } } catch (final InterruptedException e) { } } catch (final MalformedURLException e) { } } if (simulate) { count = ids.size(); prop.put("urldelete-active", count == 0 ? 2 : 1); } else { sb.remove(ids); defaultConnector.commit(false); sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs matching with " + urldelete); prop.put("urldelete-active", 2); } } else { // collect using a regular expression on urls String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/"; if (simulate) { try { count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery); } catch (final IOException e) { } prop.put("urldelete-active", count == 0 ? 2 : 1); } else { try { defaultConnector.deleteByQuery(regexquery); defaultConnector.commit(false); sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, regex match = " + urldelete); } catch (final IOException e) { } prop.put("urldelete-active", 2); } } prop.put("urldelete-active_count", count); } if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) { boolean simulate = post.containsKey("simulate-timedelete"); Date deleteageDate = null; long t = timeParser(timedelete_number, timedelete_unit); // year, month, day, hour if (t > 0) deleteageDate = new Date(t); final String collection1Query = (timedelete_source_loaddate_checked ? CollectionSchema.load_date_dt : CollectionSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]"; final String webgraphQuery = (timedelete_source_loaddate_checked ? WebgraphSchema.load_date_dt : WebgraphSchema.last_modified).getSolrFieldName() + ":[* TO " + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]"; if (simulate) { try { count = (int) defaultConnector.getCountByQuery(collection1Query); } catch (final IOException e) { } prop.put("timedelete-active", count == 0 ? 2 : 1); } else { try { defaultConnector.deleteByQuery(collection1Query); defaultConnector.commit(false); if (webgraphConnector != null) webgraphConnector.deleteByQuery(webgraphQuery); sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, docs older than " + timedelete_number + " " + timedelete_unit); } catch (final IOException e) { } prop.put("timedelete-active", 2); } prop.put("timedelete-active_count", count); } if (post != null && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) { boolean simulate = post.containsKey("simulate-collectiondelete"); collectiondelete = collectiondelete.replaceAll(" ", "").replaceAll(",", "|"); String query = collectiondelete_mode_unassigned_checked ? "-" + CollectionSchema.collection_sxt + AbstractSolrConnector.CATCHALL_DTERM : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\"" : QueryModifier.parseCollectionExpression(collectiondelete); if (simulate) { try { count = (int) defaultConnector.getCountByQuery(query); } catch (final IOException e) { } prop.put("collectiondelete-active", count == 0 ? 2 : 1); } else { try { defaultConnector.deleteByQuery(query); defaultConnector.commit(false); sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, collection " + collectiondelete); } catch (final IOException e) { } prop.put("collectiondelete-active", 2); } prop.put("collectiondelete-active_count", count); } if (post != null && (post.containsKey("simulate-querydelete") || post.containsKey("engage-querydelete"))) { boolean simulate = post.containsKey("simulate-querydelete"); SolrConnector connector = schemaName.equals(CollectionSchema.CORE_NAME) ? defaultConnector : sb.index.fulltext().getWebgraphConnector(); if (simulate) { try { count = (int) connector.getCountByQuery(querydelete); } catch (final IOException e) { } prop.put("querydelete-active", count == 0 ? 2 : 1); } else { try { ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size before deletion = " + connector.getSize()); connector.deleteByQuery(querydelete); connector.commit(false); ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete + "\", size after commit = " + connector.getSize()); sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION, "deletion, solr query, q = " + querydelete); } catch (final IOException e) { } prop.put("querydelete-active", 2); } prop.put("querydelete-active_count", count); } prop.put("doccount", defaultConnector.getSize()); prop.put("cores_" + 0 + "_name", CollectionSchema.CORE_NAME); prop.put("cores_" + 0 + "_selected", CollectionSchema.CORE_NAME.equals(schemaName) ? 1 : 0); prop.put("cores_" + 1 + "_name", WebgraphSchema.CORE_NAME); prop.put("cores_" + 1 + "_selected", WebgraphSchema.CORE_NAME.equals(schemaName) ? 1 : 0); prop.put("cores", 2); // return rewrite properties return prop; }
From source file:SolrUpdate.java
License:Apache License
public void getAllSolrRecords() { String pmid;/* ww w . j a v a2 s .c om*/ try { CoreAdminRequest adminRequest = new CoreAdminRequest(); adminRequest.setAction(CoreAdminAction.RELOAD); SolrServer solr = new HttpSolrServer("http://localhost:8983/solr"); String query; query = "pmid:*"; SolrQuery theq = new SolrQuery(); theq.setQuery(query); theq.setStart(0); theq.setRows(10000); QueryResponse response = new QueryResponse(); response = solr.query(theq); SolrDocumentList list = response.getResults(); int docnum = 1; for (SolrDocument doc : list) { Publication currlist = new Publication(); List<String> fullnames = new ArrayList<String>(); String currepubsum1 = "", currepubsum2 = ""; if (doc.getFieldValue("abstract") != null) { currlist.setAbstract(doc.getFieldValue("abstract").toString()); } if (doc.getFieldValue("ptitle") != null) { currlist.setTitle(doc.getFieldValue("ptitle").toString()); } if (doc.getFieldValue("author_fullname_list") != null) { currlist.setFirst5authors(doc.getFieldValue("author_fullname_list").toString()); } if (doc.getFieldValue("pmid") != null) { currlist.setPmid(Integer.valueOf(doc.getFieldValue("pmid").toString())); } if (doc.getFieldValue("completion") != null) { currlist.setCompletion(Boolean.valueOf(doc.getFieldValue("completion").toString())); } if (doc.getFieldValue("lruid") != null) { currlist.setLruid(doc.getFieldValue("lruid").toString()); } if (doc.getFieldValue("draftpoint") != null) { currlist.setDraftpoint(Integer.valueOf(doc.getFieldValue("draftpoint").toString())); } if (doc.getFieldValue("journalname") != null) { currlist.setJournalname(doc.getFieldValue("journalname").toString()); } if (doc.getFieldValue("journalyear") != null) { currlist.setJournalyear(doc.getFieldValue("journalyear").toString()); } if (doc.getFieldValue("journalday") != null) { currlist.setJournalday(doc.getFieldValue("journalday").toString()); } if (doc.getFieldValue("journalmonth") != null) { currlist.setJournalmonth(doc.getFieldValue("journalmonth").toString()); } if (doc.getFieldValue("journalpage") != null) { currlist.setJournalstartpg(doc.getFieldValue("journalpage").toString()); } if (doc.getFieldValue("journalissue") != null) { currlist.setJournalissue(doc.getFieldValue("journalissue").toString()); } if (doc.getFieldValue("journalvolume") != null) { currlist.setJournalvolume(doc.getFieldValue("journalvolume").toString()); } if (doc.getFieldValue("publicationdate_year") != null) { currlist.setYear(doc.getFieldValue("publicationdate_year").toString()); } if (doc.getFieldValue("doi") != null) { currlist.setDoi(doc.getFieldValue("doi").toString()); } if (doc.getFieldValues("pfileinfo") != null) { Collection<Object> currcoll = doc.getFieldValues("pfileinfo"); for (Object currobj : currcoll) { currlist.getFilesanddata().add(currobj.toString()); } } if (doc.getFieldValue("author_firstname") != null) { currlist.setFauthors((List<String>) doc.getFieldValue("author_firstname")); } if (doc.getFieldValue("author_lastname") != null) { currlist.setLauthors((List<String>) doc.getFieldValue("author_lastname")); } if (doc.getFieldValue("epubmonth") != null) { currlist.setEpubmonth(doc.getFieldValue("epubmonth").toString()); } if (doc.getFieldValue("epubyear") != null) { currlist.setEpubyear(doc.getFieldValue("epubyear").toString()); } if (doc.getFieldValue("epubday") != null) { currlist.setEpubday(doc.getFieldValue("epubday").toString()); } int counter = 0; for (String currstring : currlist.getFauthors()) { currstring += " " + currlist.getLauthors().get(counter); fullnames.add(currstring); counter++; } currlist.setFullnames(fullnames); if (currlist.getJournalvolume().length() > 0) { currepubsum2 += currlist.getJournalvolume(); } if (currlist.getJournalissue().length() > 0) { currepubsum2 += "(" + currlist.getJournalissue() + ")" + ":"; } if (currlist.getJournalstartpg().length() > 0) { currepubsum2 += currlist.getJournalstartpg() + "."; } if (currlist.getEpubday().length() < 1 && currlist.getEpubmonth().length() < 1 && currlist.getEpubyear().length() < 1) { currepubsum1 = "[Epub ahead of print]"; } else if (currlist.getEpubyear().length() > 0) { currepubsum1 = "Epub " + currlist.getEpubyear() + " " + currlist.getEpubmonth() + " " + currlist.getEpubday(); } else { currepubsum1 = ""; } currlist.setEpubsum(currepubsum1); currlist.setEpubsum2(currepubsum2); currlist.setIndex(docnum); if (currlist.getCompletion() == false) { currlist.setComp("Hidden"); } else { currlist.setComp("Visible"); } solrrecords.add(currlist); docnum++; } } catch (Exception ex) { System.out.println(ex); } System.out.println("There are a total of this many records gathered: " + solrrecords.size()); }
From source file:citation.java
License:Open Source License
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) { // return variable that accumulates replacements final Switchboard sb = (Switchboard) env; final serverObjects prop = new serverObjects(); final Segment segment = sb.index; final SolrConnector connector = segment.fulltext().getDefaultConnector(); // avoid UNRESOLVED PATTERN prop.put("url", ""); prop.put("citations", 0); prop.put("sentences", 0); DigestURL uri = null;/*from www . j a va 2 s . co m*/ String url = ""; String hash = ""; int ch = 10; boolean filter = false; // show cited sentences only if (post != null) { if (post.containsKey("url")) { url = post.get("url"); if (!url.startsWith("http://") && !url.startsWith("https://") && !url.startsWith("ftp://") && !url.startsWith("smb://") && !url.startsWith("file://")) { url = "http://" + url; } } if (post.containsKey("hash")) { hash = post.get("hash"); } if (post.containsKey("ch")) { ch = post.getInt("ch", ch); } filter = post.getBoolean("filter"); } prop.put("filter", filter); if (url.length() > 0) { try { uri = new DigestURL(url, null); hash = ASCII.String(uri.hash()); } catch (final MalformedURLException e) { } } if (uri == null && hash.length() > 0) { try { uri = sb.getURL(ASCII.getBytes(hash)); if (uri == null) { connector.commit(true); // try again, that url can be fresh uri = sb.getURL(ASCII.getBytes(hash)); } } catch (IOException e) { ConcurrentLog.logException(e); } } if (uri == null) return prop; // no proper url addressed url = uri.toNormalform(true); prop.put("url", url); // get the document from the index SolrDocument doc; try { doc = segment.fulltext().getDefaultConnector().getDocumentById(hash, CollectionSchema.title.getSolrFieldName(), CollectionSchema.text_t.getSolrFieldName()); } catch (final IOException e1) { return prop; } @SuppressWarnings("unchecked") ArrayList<String> title = (ArrayList<String>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName()); String text = (String) doc.getFieldValue(CollectionSchema.text_t.getSolrFieldName()); ArrayList<String> sentences = new ArrayList<String>(); if (title != null) for (String s : title) if (s.length() > 0) sentences.add(s); SentenceReader sr = new SentenceReader(text); StringBuilder line; while (sr.hasNext()) { line = sr.next(); if (line.length() > 0) sentences.add(line.toString()); } // for each line make a statistic about the number of occurrences somewhere else OrderedScoreMap<String> scores = new OrderedScoreMap<String>(null); // accumulates scores for citating urls LinkedHashMap<String, Set<DigestURL>> sentenceOcc = new LinkedHashMap<String, Set<DigestURL>>(); for (String sentence : sentences) { if (sentence == null || sentence.length() < 40) { // do not count the very short sentences sentenceOcc.put(sentence, null); continue; } try { sentence = sentence.replace('"', '\''); SolrDocumentList doclist = connector.getDocumentListByQuery("text_t:\"" + sentence + "\"", CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100, CollectionSchema.sku.getSolrFieldName()); int count = (int) doclist.getNumFound(); if (count > 0) { Set<DigestURL> list = new TreeSet<DigestURL>(); for (SolrDocument d : doclist) { String u = (String) d.getFieldValue(CollectionSchema.sku.getSolrFieldName()); if (u == null || u.equals(url)) continue; scores.inc(u); try { list.add(new DigestURL(u, null)); } catch (final MalformedURLException e) { } } sentenceOcc.put(sentence, list); } } catch (final Throwable ee) { } } sentences.clear(); // we do not need this again // iterate the sentences int i = 0; int sentenceNr = 0; for (Map.Entry<String, Set<DigestURL>> se : sentenceOcc.entrySet()) { Set<DigestURL> app = se.getValue(); if (filter) { // prepare list, only include sentence with citation if (app != null && app.size() > 0) { StringBuilder dd = new StringBuilder(se.getKey()); prop.put("sentences_" + i + "_dt", sentenceNr); dd.append("<br/>appears in:"); for (DigestURL u : app) { if (u != null) { dd.append(" <a href=\"").append(u.toNormalform(false)).append("\">").append(u.getHost()) .append("</a>"); } } prop.put("sentences_" + i + "_dd", dd.toString()); i++; } } else { // prepare list, include all sentences StringBuilder dd = new StringBuilder(se.getKey()); prop.put("sentences_" + i + "_dt", sentenceNr); if (app != null && app.size() > 0) { dd.append("<br/>appears in:"); for (DigestURL u : app) { if (u != null) { dd.append(" <a href=\"").append(u.toNormalform(false)).append("\">").append(u.getHost()) .append("</a>"); } } } prop.put("sentences_" + i + "_dd", dd.toString()); i++; } sentenceNr++; } prop.put("sentences", i); // iterate the citations in order of number of citations i = 0; for (String u : scores.keyList(false)) { try { DigestURL uu = new DigestURL(u, null); prop.put("citations_" + i + "_dt", "<a href=\"" + u + "\">" + u + "</a>"); StringBuilder dd = new StringBuilder(); dd.append("makes ").append(Integer.toString(scores.get(u))).append(" citations: of ").append(url); for (Map.Entry<String, Set<DigestURL>> se : sentenceOcc.entrySet()) { Set<DigestURL> occurls = se.getValue(); if (occurls != null && occurls.contains(uu)) dd.append("<br/><a href=\"/solr/select?q=text_t:%22").append(se.getKey().replace('"', '\'')) .append("%22&rows=100&grep=&wt=grephtml\">").append(se.getKey()).append("</a>"); } prop.put("citations_" + i + "_dd", dd.toString()); i++; } catch (final MalformedURLException e) { } } prop.put("citations", i); // find similar documents from different hosts i = 0; for (String u : scores.keyList(false)) { if (scores.get(u) < ch) continue; try { DigestURL uu = new DigestURL(u, null); if (uu.getOrganization().equals(uri.getOrganization())) continue; prop.put("similar_links_" + i + "_url", u); i++; } catch (final MalformedURLException e) { } } prop.put("similar_links", i); prop.put("similar", i > 0 ? 1 : 0); // return rewrite properties return prop; }
From source file:HostBrowser.java
License:Open Source License
@SuppressWarnings({ "unchecked" })
public static serverObjects respond(final RequestHeader header, final serverObjects post,
final serverSwitch env) {
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
Fulltext fulltext = sb.index.fulltext();
final boolean authorized = sb.verifyAuthentication(header);
final boolean autoload = authorized && sb.getConfigBool("browser.autoload", true);
final boolean load4everyone = sb.getConfigBool("browser.load4everyone", false);
final boolean loadRight = autoload || load4everyone; // add config later
final boolean searchAllowed = sb.getConfigBool(SwitchboardConstants.PUBLIC_SEARCHPAGE, true) || authorized;
final serverObjects prop = new serverObjects();
// set default values
prop.put("path", "");
prop.put("result", "");
prop.put("hosts", 0);
prop.put("files", 0);
prop.put("hostanalysis", 0);
prop.put("admin", "false");
boolean admin = false;
String referer = header.get("Referer", "");
if ((post != null && post.getBoolean("admin")) || referer.contains("HostBrowser.html?admin=true")) {
prop.put("topmenu", 2);
prop.put("admin", "true");
admin = true;/*ww w . j av a 2 s. com*/
} else if (authorized) { // show top nav to admins
prop.put("topmenu", 1);
} else { // for other respect setting in Search Design Configuration
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
}
final String promoteSearchPageGreeting = (env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME,
false)) ? env.getConfig("network.unit.description", "")
: env.getConfig(SwitchboardConstants.GREETING, "");
prop.put("topmenu_promoteSearchPageGreeting", promoteSearchPageGreeting);
if (!searchAllowed) {
prop.put("result", "You are not allowed to use this page. Please ask an administrator for permission.");
prop.putNum("ucount", 0);
return prop;
}
String path = post == null ? "" : post.get("path", "").trim();
if (authorized)
sb.index.fulltext().commit(true);
if (post == null || env == null) {
prop.putNum("ucount", fulltext.collectionSize());
return prop;
}
int p = path.lastIndexOf('/');
if (p < 0 && path.length() > 0)
path = path + "/";
else if (p > 7)
path = path.substring(0, p + 1); // the search path shall always end with "/"
if (path.length() > 0 && (!path.startsWith("http://") && !path.startsWith("https://")
&& !path.startsWith("ftp://") && !path.startsWith("smb://") && !path.startsWith("file://"))) {
path = "http://" + path;
}
prop.putHTML("path", path);
prop.put("delete", authorized && path.length() > 0 ? 1 : 0);
DigestURL pathURI = null;
try {
pathURI = new DigestURL(path);
} catch (final MalformedURLException e) {
}
String load = post.get("load", "");
boolean wait = false;
try {
if (loadRight && autoload && path.length() != 0 && pathURI != null && load.length() == 0
&& sb.index.getLoadTime(ASCII.String(pathURI.hash())) < 0) {
// in case that the url does not exist and loading is wanted turn this request into a loading request
load = path;
wait = true;
}
} catch (IOException e1) {
load = path;
wait = true;
}
if (load.length() > 0 && loadRight) {
// stack URL
DigestURL url;
if (sb.crawlStacker.size() > 2)
wait = false;
try {
url = new DigestURL(load);
String reasonString = sb.crawlStacker.stackCrawl(new Request(sb.peers.mySeed().hash.getBytes(), url,
null, load, new Date(), sb.crawler.defaultProxyProfile.handle(), 0,
sb.crawler.defaultProxyProfile.timezoneOffset()));
prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load)
: ("not indexed url '" + load + "': " + reasonString));
if (wait)
waitloop: for (int i = 0; i < 30; i++) {
try {
if (sb.index.getLoadTime(ASCII.String(url.hash())) >= 0)
break;
} catch (IOException e1) {
e1.printStackTrace();
break waitloop;
}
try {
Thread.sleep(100);
} catch (final InterruptedException e) {
}
}
} catch (final MalformedURLException e) {
prop.putHTML("result", "bad url '" + load + "'");
}
}
if (authorized && post.containsKey("deleteLoadErrors")) {
try {
fulltext.getDefaultConnector()
.deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
+ CollectionSchema.httpstatus_i.getSolrFieldName()
+ AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
ConcurrentLog.info("HostBrowser:", "delete documents with httpstatus_i <> 200");
fulltext.getDefaultConnector().deleteByQuery(
CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"");
ConcurrentLog.info("HostBrowser:", "delete documents with failtype_s = fail");
fulltext.getDefaultConnector().deleteByQuery(
CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"");
ConcurrentLog.info("HostBrowser:", "delete documents with failtype_s = excl");
prop.putNum("ucount", fulltext.collectionSize());
return prop;
} catch (final IOException ex) {
ConcurrentLog.logException(ex);
}
}
if (post.containsKey("hosts")) {
// generate host list
try {
boolean onlyCrawling = "crawling".equals(post.get("hosts", ""));
boolean onlyErrors = "error".equals(post.get("hosts", ""));
int maxcount = authorized ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums
// collect hosts from index
ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector()
.getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", maxcount,
CollectionSchema.host_s.getSolrFieldName())
.get(CollectionSchema.host_s.getSolrFieldName());
if (hostscore == null)
hostscore = new ClusteredScoreMap<String>(true);
// collect hosts from crawler
final Map<String, Integer[]> crawler = (authorized)
? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots)
: new HashMap<String, Integer[]>();
// collect the errorurls
Map<String, ReversibleScoreMap<String>> exclfacets = authorized ? fulltext.getDefaultConnector()
.getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(),
maxcount, CollectionSchema.host_s.getSolrFieldName())
: null;
ReversibleScoreMap<String> exclscore = exclfacets == null ? new ClusteredScoreMap<String>(true)
: exclfacets.get(CollectionSchema.host_s.getSolrFieldName());
Map<String, ReversibleScoreMap<String>> failfacets = authorized ? fulltext.getDefaultConnector()
.getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(),
maxcount, CollectionSchema.host_s.getSolrFieldName())
: null;
ReversibleScoreMap<String> failscore = failfacets == null ? new ClusteredScoreMap<String>(true)
: failfacets.get(CollectionSchema.host_s.getSolrFieldName());
int c = 0;
Iterator<String> i = hostscore.keys(false);
String host;
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("hosts_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("hosts_list_" + c + "_host", host);
boolean inCrawler = crawler.containsKey(host);
int exclcount = exclscore.get(host);
int failcount = failscore.get(host);
int errors = exclcount + failcount;
prop.put("hosts_list_" + c + "_count", hostscore.get(host));
prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0);
if (inCrawler)
prop.put("hosts_list_" + c + "_crawler_pending", crawler.get(host)[0]);
prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0);
if (errors > 0) {
prop.put("hosts_list_" + c + "_errors_exclcount", exclcount);
prop.put("hosts_list_" + c + "_errors_failcount", failcount);
}
prop.put("hosts_list_" + c + "_type", inCrawler ? 2 : errors > 0 ? 1 : 0);
if (onlyCrawling) {
if (inCrawler)
c++;
} else if (onlyErrors) {
if (errors > 0)
c++;
} else {
c++;
}
}
prop.put("hosts_list", c);
prop.put("hosts", 1);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
if (path.length() > 0) {
try {
DigestURL uri = new DigestURL(path);
String host = uri.getHost();
// write host analysis if path after host is empty
if (uri.getPath().length() <= 1 && host != null && host.length() > 0
&& sb.getConfigBool("decoration.hostanalysis", false)) {
//how many documents per crawldepth_i; get crawldepth_i facet for host
ArrayList<String> ff = new ArrayList<>();
for (CollectionSchema csf : CollectionSchema.values()) {
if ((csf.getType() != SolrType.num_integer && csf.getType() != SolrType.num_long)
|| csf.isMultiValued())
continue;
String facetfield = csf.getSolrFieldName();
if (!fulltext.getDefaultConfiguration().contains(facetfield))
continue;
ff.add(csf.getSolrFieldName());
}
// add also vocabulary counters
Map<String, ReversibleScoreMap<String>> vocabularyFacet = sb.index.fulltext()
.getDefaultConnector().getFacets(
CollectionSchema.vocabularies_sxt.getSolrFieldName()
+ AbstractSolrConnector.CATCHALL_DTERM,
100, CollectionSchema.vocabularies_sxt.getSolrFieldName());
if (vocabularyFacet.size() > 0) {
Collection<String> vocnames = vocabularyFacet.values().iterator().next().keyList(true);
for (String vocname : vocnames) {
ff.add(CollectionSchema.VOCABULARY_PREFIX + vocname
+ CollectionSchema.VOCABULARY_LOGCOUNT_SUFFIX);
ff.add(CollectionSchema.VOCABULARY_PREFIX + vocname
+ CollectionSchema.VOCABULARY_LOGCOUNTS_SUFFIX);
}
}
// list the facets
String[] facetfields = ff.toArray(new String[ff.size()]);
Map<String, ReversibleScoreMap<String>> facets = fulltext.getDefaultConnector().getFacets(
CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 100, facetfields);
int fc = 0;
for (Map.Entry<String, ReversibleScoreMap<String>> facetentry : facets.entrySet()) {
ReversibleScoreMap<String> facetfieldmap = facetentry.getValue();
if (facetfieldmap.size() == 0)
continue;
TreeMap<Long, Integer> statMap = new TreeMap<>();
for (String k : facetfieldmap)
statMap.put(Long.parseLong(k), facetfieldmap.get(k));
prop.put("hostanalysis_facets_" + fc + "_facetname", facetentry.getKey());
int c = 0;
for (Entry<Long, Integer> entry : statMap.entrySet()) {
prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_key", entry.getKey());
prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_count", entry.getValue());
prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_a",
"http://localhost:" + sb.getConfigInt("port", 8090)
+ "/solr/collection1/select?q=host_s:" + host + " AND "
+ facetentry.getKey() + ":" + entry.getKey()
+ "&defType=edismax&start=0&rows=1000&fl=sku,crawldepth_i");
c++;
}
prop.put("hostanalysis_facets_" + fc + "_facet", c);
fc++;
}
prop.put("hostanalysis_facets", fc);
prop.put("hostanalysis", 1);
}
// write file list for subpath
boolean delete = false;
boolean reload404 = false;
if (authorized && post.containsKey("delete")) {
// delete the complete path!! That includes everything that matches with this prefix.
delete = true;
}
if (authorized && post.containsKey("reload404")) {
// try to re-load all urls that have load errors and matches with this prefix.
reload404 = true;
}
int facetcount = post.getInt("facetcount", 0);
boolean complete = post.getBoolean("complete");
if (complete) { // we want only root paths for complete lists
p = path.indexOf('/', 10);
if (p > 0)
path = path.substring(0, p + 1);
}
prop.put("files_complete", complete ? 1 : 0);
prop.put("files_complete_admin", admin ? "true" : "false");
prop.putHTML("files_complete_path", path);
p = path.substring(0, path.length() - 1).lastIndexOf('/');
if (p < 8) {
prop.put("files_root", 1);
} else {
prop.put("files_root", 0);
prop.putHTML("files_root_path", path.substring(0, p + 1));
prop.put("files_root_admin", admin ? "true" : "false");
}
// generate file list from path
prop.putHTML("outbound_host", host);
if (authorized)
prop.putHTML("outbound_admin_host", host); //used for WebStructurePicture_p link
prop.putHTML("inbound_host", host);
String hosthash = ASCII.String(uri.hash(), 6, 6);
String[] pathparts = uri.getPaths();
// get all files for a specific host from the index
StringBuilder q = new StringBuilder();
if (host == null) {
if (path.startsWith("file://")) {
q.append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(":file");
}
} else {
q.append(CollectionSchema.host_s.getSolrFieldName()).append(":\"").append(host).append("\"");
}
if (pathparts.length > 0 && pathparts[0].length() > 0) {
for (String pe : pathparts) {
if (pe.length() > 0)
q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName())
.append(":\"").append(pe).append('\"');
}
} else {
if (facetcount > 1000 || post.containsKey("nepr")) {
q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName())
.append(AbstractSolrConnector.CATCHALL_DTERM);
}
}
BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(
q.toString(), CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000, TIMEOUT,
100, 1, false, CollectionSchema.id.getSolrFieldName(),
CollectionSchema.sku.getSolrFieldName(), CollectionSchema.failreason_s.getSolrFieldName(),
CollectionSchema.failtype_s.getSolrFieldName(),
CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName(),
CollectionSchema.inboundlinks_urlstub_sxt.getSolrFieldName(),
CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(),
CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName(),
CollectionSchema.crawldepth_i.getSolrFieldName(),
CollectionSchema.references_i.getSolrFieldName(),
CollectionSchema.references_internal_i.getSolrFieldName(),
CollectionSchema.references_external_i.getSolrFieldName(),
CollectionSchema.references_exthosts_i.getSolrFieldName(),
CollectionSchema.cr_host_chance_d.getSolrFieldName(),
CollectionSchema.cr_host_norm_i.getSolrFieldName());
SolrDocument doc;
Set<String> storedDocs = new HashSet<String>();
Map<String, FailType> errorDocs = new HashMap<String, FailType>();
Set<String> inboundLinks = new HashSet<String>();
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
Map<String, InfoCacheEntry> infoCache = new HashMap<String, InfoCacheEntry>();
int hostsize = 0;
final List<String> deleteIDs = new ArrayList<String>();
final Collection<String> reloadURLs = new ArrayList<String>();
final Set<String> reloadURLCollection = new HashSet<String>();
long timeoutList = System.currentTimeMillis() + TIMEOUT;
long timeoutReferences = System.currentTimeMillis() + 6000;
ReferenceReportCache rrCache = sb.index.getReferenceReportCache();
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
FailType error = errortype == null ? null : FailType.valueOf(errortype);
String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids,
System.currentTimeMillis() < timeoutReferences));
if (u.startsWith(path)) {
if (delete) {
deleteIDs.add(ids);
} else {
if (error == null)
storedDocs.add(u);
else {
if (reload404 && error == FailType.fail) {
ArrayList<String> collections = (ArrayList<String>) doc
.getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName());
if (collections != null)
reloadURLCollection.addAll(collections);
reloadURLs.add(u);
}
if (authorized)
errorDocs.put(u, error);
}
}
} else if (complete) {
if (error == null)
storedDocs.add(u);
else {
if (authorized)
errorDocs.put(u, error);
}
}
if ((complete || u.startsWith(path)) && !storedDocs.contains(u))
inboundLinks.add(u); // add the current link
if (error == null) {
hostsize++;
// collect inboundlinks to browse the host
Iterator<String> links = URIMetadataNode.getLinks(doc, true);
while (links.hasNext()) {
u = links.next();
if ((complete || u.startsWith(path)) && !storedDocs.contains(u))
inboundLinks.add(u);
}
// collect referrer links
links = URIMetadataNode.getLinks(doc, false);
while (links.hasNext()) {
u = links.next();
try {
MultiProtocolURL mu = new MultiProtocolURL(u);
if (mu.getHost() != null) {
ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
if (lks == null) {
lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
outboundHosts.put(mu.getHost(), lks);
}
lks.set(u, u.length());
}
} catch (final MalformedURLException e) {
}
}
}
if (System.currentTimeMillis() > timeoutList)
break;
}
if (deleteIDs.size() > 0)
sb.remove(deleteIDs);
if (reloadURLs.size() > 0) {
final Map<String, Pattern> cm = new LinkedHashMap<String, Pattern>();
for (String collection : reloadURLCollection)
cm.put(collection, QueryParams.catchall_pattern);
sb.reload(reloadURLs, cm.size() > 0 ? cm : CrawlProfile.collectionParser("user"), false);
}
// collect from crawler
List<Request> domainStackReferences = (authorized)
? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000)
: new ArrayList<Request>(0);
Set<String> loadingLinks = new HashSet<String>();
for (Request crawlEntry : domainStackReferences)
loadingLinks.add(crawlEntry.url().toNormalform(true));
// now combine all lists into one
Map<String, StoreType> files = new HashMap<String, StoreType>();
for (String u : storedDocs)
files.put(u, StoreType.INDEX);
for (Map.Entry<String, FailType> e : errorDocs.entrySet())
files.put(e.getKey(), e.getValue() == FailType.fail ? StoreType.FAILED : StoreType.EXCLUDED);
for (String u : inboundLinks)
if (!files.containsKey(u))
files.put(u, StoreType.LINK);
for (String u : loadingLinks)
if (u.startsWith(path) && !files.containsKey(u))
files.put(u, StoreType.LINK);
ConcurrentLog.info("HostBrowser", "collected " + files.size() + " urls for path " + path);
// distinguish files and folders
Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder
int pl = path.length();
String file;
for (Map.Entry<String, StoreType> entry : files.entrySet()) {
if (entry.getKey().length() < pl)
continue; // this is not inside the path
if (!entry.getKey().startsWith(path))
continue;
file = entry.getKey().substring(pl);
StoreType type = entry.getValue();
p = file.indexOf('/');
if (p < 0) {
// this is a file
list.put(entry.getKey(), type); // StoreType value: this is a file; true -> file is in index; false -> not in index, maybe in crawler
} else {
// this is a directory path or a file in a subdirectory
String remainingPath = file.substring(0, p + 1);
if (complete && remainingPath.indexOf('.') > 0) {
list.put(entry.getKey(), type); // StoreType value: this is a file
} else {
String dir = path + remainingPath;
Object c = list.get(dir);
if (c == null) {
int[] linkedStoredIncrawlerError = new int[] { 0, 0, 0, 0, 0 };
if (type == StoreType.LINK)
linkedStoredIncrawlerError[0]++;
if (type == StoreType.INDEX)
linkedStoredIncrawlerError[1]++;
if (loadingLinks.contains(entry.getKey()))
linkedStoredIncrawlerError[2]++;
if (errorDocs.containsKey(entry.getKey()))
linkedStoredIncrawlerError[errorDocs.get(entry.getKey()) == FailType.excl ? 3
: 4]++;
list.put(dir, linkedStoredIncrawlerError);
} else if (c instanceof int[]) {
if (type == StoreType.LINK)
((int[]) c)[0]++;
if (type == StoreType.INDEX)
((int[]) c)[1]++;
if (loadingLinks.contains(entry.getKey()))
((int[]) c)[2]++;
if (errorDocs.containsKey(entry.getKey()))
((int[]) c)[errorDocs.get(entry.getKey()) == FailType.excl ? 3 : 4]++;
}
}
}
}
int maxcount = 1000;
int c = 0;
// first list only folders
int filecounter = 0;
for (Map.Entry<String, Object> entry : list.entrySet()) {
if ((entry.getValue() instanceof StoreType)) {
filecounter++;
} else {
// this is a folder
prop.put("files_list_" + c + "_type", 1);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
int linked = ((int[]) entry.getValue())[0];
int stored = ((int[]) entry.getValue())[1];
int crawler = ((int[]) entry.getValue())[2];
int excl = ((int[]) entry.getValue())[3];
int error = ((int[]) entry.getValue())[4];
prop.put("files_list_" + c + "_type_stored", stored);
prop.put("files_list_" + c + "_type_linked", linked);
prop.put("files_list_" + c + "_type_pendingVisible", crawler > 0 ? 1 : 0);
prop.put("files_list_" + c + "_type_pending", crawler);
prop.put("files_list_" + c + "_type_excludedVisible", excl > 0 ? 1 : 0);
prop.put("files_list_" + c + "_type_excluded", excl);
prop.put("files_list_" + c + "_type_failedVisible", error > 0 ? 1 : 0);
prop.put("files_list_" + c + "_type_failed", error);
if (++c >= maxcount)
break;
}
}
// then list files
for (Map.Entry<String, Object> entry : list.entrySet()) {
if (entry.getValue() instanceof StoreType) {
// this is a file
prop.put("files_list_" + c + "_type", 0);
prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
StoreType type = (StoreType) entry.getValue();
try {
uri = new DigestURL(entry.getKey());
} catch (final MalformedURLException e) {
uri = null;
}
HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors
boolean loading = load.equals(entry.getKey())
|| (process != null && process != HarvestProcess.ERRORS);
boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED
|| type == StoreType.FAILED;
boolean dc = type != StoreType.INDEX && !error && !loading
&& list.containsKey(entry.getKey() + "/");
if (!dc) {
prop.put("files_list_" + c + "_type_stored",
type == StoreType.INDEX ? 1 : error ? 3 : loading ? 2 : 0 /*linked*/);
if (type == StoreType.INDEX) {
String ids = ASCII.String(uri.hash());
InfoCacheEntry ice = infoCache.get(ids);
prop.put("files_list_" + c + "_type_stored_comment",
ice == null ? "" : ice.toString()); // ice.toString() contains html, therefore do not use putHTML here
}
prop.put("files_list_" + c + "_type_stored_load", loadRight ? 1 : 0);
if (error) {
FailType failType = errorDocs.get(entry.getKey());
if (failType == null) {
// maybe this is only in the errorURL
//Metadata faildoc = sb.index.fulltext().getDefaultConnector().getMetadata(ASCII.String(uri.hash()));
prop.putHTML("files_list_" + c + "_type_stored_error", "unknown error");
} else {
String ids = ASCII.String(uri.hash());
InfoCacheEntry ice = infoCache.get(ids);
prop.put("files_list_" + c + "_type_stored_error",
failType == FailType.excl ? "excluded from indexing"
: "load fail" + (ice == null ? "" : "; " + ice.toString()));
}
}
if (loadRight) {
prop.putHTML("files_list_" + c + "_type_stored_load_url", entry.getKey());
prop.putHTML("files_list_" + c + "_type_stored_load_path", path);
}
if (++c >= maxcount)
break;
}
}
}
prop.put("files_list", c);
prop.putHTML("files_path", path);
prop.put("files_hostsize", hostsize);
prop.put("files_subpathloadsize", storedDocs.size());
prop.put("files_subpathdetectedsize", filecounter - storedDocs.size());
prop.put("files", 1);
uri = new DigestURL(path);
if (post.containsKey("showlinkstructure")) {
sb.setConfig(SwitchboardConstants.DECORATION_GRAFICS_LINKSTRUCTURE, true);
}
prop.put("files_linkgraph", uri.getPath().length() <= 1 && hostsize > 0
&& sb.getConfigBool(SwitchboardConstants.DECORATION_GRAFICS_LINKSTRUCTURE, true));
prop.put("files_linkgraph_host", uri.getHost());
// generate inbound-links table
StructureEntry struct = sb.webStructure.incomingReferences(hosthash);
if (struct != null && struct.references.size() > 0) {
maxcount = 200;
ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(
UTF8.insensitiveUTF8Comparator);
for (Map.Entry<String, Integer> entry : struct.references.entrySet())
score.set(entry.getKey(), entry.getValue());
c = 0;
Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.put("inbound_list_" + c + "_admin", admin ? "true" : "false");
prop.putHTML("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host));
prop.put("inbound_list_" + c + "_count", score.get(host));
c++;
}
prop.put("inbound_list", c);
prop.put("inbound", 1);
} else {
prop.put("inbound", 0);
}
// generate outbound-links table
if (outboundHosts.size() > 0) {
maxcount = 200;
ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(
UTF8.insensitiveUTF8Comparator);
for (Map.Entry<String, ReversibleScoreMap<String>> entry : outboundHosts.entrySet())
score.set(entry.getKey(), entry.getValue().size());
c = 0;
Iterator<String> i = score.keys(false);
while (i.hasNext() && c < maxcount) {
host = i.next();
prop.putHTML("outbound_list_" + c + "_host", host);
prop.put("outbound_list_" + c + "_count", score.get(host));
prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey());
prop.put("outbound_list_" + c + "_admin", admin ? "true" : "false");
c++;
}
prop.put("outbound_list", c);
prop.put("outbound", 1);
} else {
prop.put("outbound", 0);
}
} catch (final Throwable e) {
ConcurrentLog.logException(e);
}
}
// return rewrite properties
prop.putNum("ucount", fulltext.collectionSize());
return prop;
}
From source file:alba.solr.common.MySimpleFloatFunction.java
License:Apache License
@DocTransformer(name = "reverse") public void reverse(SolrDocument doc) { String label = doc.getFieldValue("label").toString(); StringBuffer sb = new StringBuffer(label); doc.setField("label", sb.reverse().toString()); }
From source file:alba.solr.common.MySimpleFloatFunction.java
License:Apache License
@DocTransformer(name = "ucase") public void ucase(SolrDocument doc, @Param(name = "field", description = "the field") String field) { doc.setField(field, doc.getFieldValue(field).toString().toUpperCase()); }
From source file:argendata.service.impl.AppServiceImpl.java
private List<App> searchApp(String myTerms, List<String> mySortByFields, List<String> myKeywords) throws MalformedURLException, SolrServerException { QueryResponse queryRsp = searchQueryApp(myTerms, mySortByFields, myKeywords); SolrDocumentList docs = queryRsp.getResults(); List<SolrDocument> list = docs.subList(0, docs.size()); List<App> resp = new ArrayList<App>(); for (SolrDocument sd : list) { App anApp = this.getApprovedAppByQName("Semanticapp:" + (String) sd.getFieldValue("titleId")); resp.add(anApp);//from ww w .j ava 2s . c om } return resp; }
From source file:argendata.service.impl.DatasetServiceImpl.java
private List<Dataset> getDatasets(QueryResponse rsp) { List<Dataset> datasets = new ArrayList<Dataset>(); SolrDocumentList docs = rsp.getResults(); List<SolrDocument> list = docs.subList(0, docs.size()); Set<String> keywords; for (SolrDocument sd : list) { keywords = new HashSet<String>(); Object obj = sd.getFieldValue("keyword"); if (obj instanceof String) { keywords.add((String) obj); } else/*from w w w .j a va2 s .c o m*/ keywords.addAll((List<String>) obj); datasets.add(new Dataset((String) (sd.getFieldValue("title")), (String) (sd.getFieldValue("license")), keywords, (String) (sd.getFieldValue("dataQuality")), (String) ((Date) (sd.getFieldValue("modified"))).toString(), (String) (sd.getFieldValue("spatial")), (String) (sd.getFieldValue("temporal")), (String) (sd.getFieldValue("location")), (String) (sd.getFieldValue("publisher")), (String) (sd.getFieldValue("resourceType")), (String) (sd.getFieldValue("titleId")))); } return datasets; }
From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java
License:Apache License
@Test public void testRead() { SolrDocument document = new SolrDocument(); document.addField("stringProperty", "christoph"); document.addField("intProperty", 32); ConvertableBean convertable = converter.read(ConvertableBean.class, (Map<String, Object>) document); Assert.assertEquals(document.getFieldValue("stringProperty"), convertable.getStringProperty()); Assert.assertEquals(document.getFieldValue("intProperty"), convertable.getIntProperty()); }
From source file:at.tugraz.kmi.medokyservice.rec.io.SolrDBClient.java
License:Open Source License
public List<ContentTags> getContentTagsByUser(String username) throws SolrServerException { List<ContentTags> tagList = new LinkedList<ContentTags>(); SolrQuery query = new SolrQuery("username:" + username).setFields("tags, timestamp") .setFilterQueries("tags:['' TO *]").setRows(10000000); QueryResponse rsp = this.query(query); SolrDocumentList docs = rsp.getResults(); for (SolrDocument doc : docs) { Date date = (Date) doc.getFieldValue("timestamp"); if (date == null || doc.getFieldValue("tags") == null) continue; ContentTags tagEntry = new ContentTags(date); tagEntry.tags = ((ArrayList<String>) doc.getFieldValue("tags")); tagList.add(tagEntry);//from w ww. j av a2 s .co m } return tagList; }