Example usage for org.apache.solr.common SolrDocument getFieldValue

List of usage examples for org.apache.solr.common SolrDocument getFieldValue

Introduction

In this page you can find the example usage for org.apache.solr.common SolrDocument getFieldValue.

Prototype

@Override
public Object getFieldValue(String name) 

Source Link

Document

Get the value or collection of values for a given field.

Usage

From source file:IndexDeletion_p.java

License:Open Source License

public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
        final serverObjects post, final serverSwitch env) {
    // return variable that accumulates replacements
    final Switchboard sb = (Switchboard) env;
    final serverObjects prop = new serverObjects();

    SolrConnector defaultConnector = sb.index.fulltext().getDefaultConnector();
    SolrConnector webgraphConnector = sb.index.fulltext().getWebgraphConnector();
    if (post == null || post.size() == 0)
        defaultConnector.commit(false); // we must do a commit here because the user cannot see a proper count.

    String schemaName = CollectionSchema.CORE_NAME;
    if (post != null)
        schemaName = post.get("core", schemaName);

    // Delete by URL Matching
    String urldelete = post == null ? "" : post.get("urldelete", "");
    boolean urldelete_mm_subpath_checked = post == null ? true
            : post.get("urldelete-mm", "subpath").equals("subpath");
    prop.putHTML("urldelete", urldelete);
    prop.put("urldelete-mm-subpath-checked", urldelete_mm_subpath_checked ? 1 : 0);
    prop.put("urldelete-mm-regexp-checked", urldelete_mm_subpath_checked ? 0 : 1);
    prop.put("urldelete-active", 0);

    // Delete by Age
    int timedelete_number = post == null ? 14 : post.getInt("timedelete-number", 14);
    String timedelete_unit = post == null ? "day" : post.get("timedelete-unit", "day");
    boolean timedelete_source_loaddate_checked = post == null ? true
            : post.get("timedelete-source", "loaddate").equals("loaddate");
    for (int i = 1; i <= 90; i++)
        prop.put("timedelete-n-" + i, 0);
    prop.put("timedelete-n-" + timedelete_number, timedelete_number);
    prop.put("timedelete-u-year", timedelete_unit.equals("year") ? 1 : 0);
    prop.put("timedelete-u-month", timedelete_unit.equals("month") ? 1 : 0);
    prop.put("timedelete-u-day", timedelete_unit.equals("day") ? 1 : 0);
    prop.put("timedelete-u-hour", timedelete_unit.equals("hour") ? 1 : 0);
    prop.put("timedelete-source-loaddate-checked", timedelete_source_loaddate_checked ? 1 : 0);
    prop.put("timedelete-source-lastmodified-checked", timedelete_source_loaddate_checked ? 0 : 1);
    prop.put("timedelete-active", 0);

    // Delete Collections
    boolean collectiondelete_mode_unassigned_checked = post == null ? true
            : post.get("collectiondelete-mode", "unassigned").equals("unassigned");
    String collectiondelete = post == null ? "" : post.get("collectiondelete", "");
    if (post != null && post.containsKey("collectionlist")) {
        collectiondelete_mode_unassigned_checked = false;
        prop.put("collectiondelete-select", 1);
        try {//from   ww  w  .  ja  va 2 s .c o m
            ScoreMap<String> collectionMap = defaultConnector
                    .getFacets("*:*", 1000, CollectionSchema.collection_sxt.getSolrFieldName())
                    .get(CollectionSchema.collection_sxt.getSolrFieldName());
            Iterator<String> i = collectionMap.iterator();
            int c = 0;
            while (i.hasNext()) {
                String collection = i.next();
                prop.put("collectiondelete-select_list_" + c + "_collection-name",
                        collection + "/" + collectionMap.get(collection));
                prop.put("collectiondelete-select_list_" + c + "_collection-value", collection);
                c++;
            }
            prop.put("collectiondelete-select_list", c);
        } catch (final IOException e1) {
            prop.put("collectiondelete-select", 0);
        }
    } else {
        prop.put("collectiondelete-select", 0);
    }
    prop.put("collectiondelete-mode-unassigned-checked", collectiondelete_mode_unassigned_checked ? 1 : 0);
    prop.put("collectiondelete-mode-assigned-checked", collectiondelete_mode_unassigned_checked ? 0 : 1);
    prop.putHTML("collectiondelete-select_collectiondelete", collectiondelete);
    prop.put("collectiondelete-active", 0);

    // Delete by Solr Query
    prop.put("querydelete", "");
    String querydelete = post == null ? "" : post.get("querydelete", "");
    // simulate default search field if no field is given by adding text_t: as target field
    if (!querydelete.isEmpty() && !querydelete.contains(":"))
        querydelete = CollectionSchema.text_t.getSolrFieldName() + ":" + querydelete;
    prop.putHTML("querydelete", querydelete);
    prop.put("querydelete-active", 0);

    int count = post == null ? -1 : post.getInt("count", -1);

    if (post != null && (post.containsKey("simulate-urldelete") || post.containsKey("engage-urldelete"))) {
        boolean simulate = post.containsKey("simulate-urldelete");
        // parse the input
        urldelete = urldelete.trim();
        if (urldelete_mm_subpath_checked) {
            // collect using url stubs
            Set<String> ids = new HashSet<String>();
            String[] stubURLs = urldelete.indexOf('\n') > 0 || urldelete.indexOf('\r') > 0
                    ? urldelete.split("[\\r\\n]+")
                    : urldelete.split(Pattern.quote("|"));
            for (String urlStub : stubURLs) {
                if (urlStub == null || urlStub.length() == 0)
                    continue;
                int pos = urlStub.indexOf("://", 0);
                if (pos == -1) {
                    if (urlStub.startsWith("ftp"))
                        urlStub = "ftp://" + urlStub;
                    else
                        urlStub = "http://" + urlStub;
                }
                try {
                    DigestURL u = new DigestURL(urlStub);
                    BlockingQueue<SolrDocument> dq = defaultConnector.concurrentDocumentsByQuery(
                            CollectionSchema.host_s.getSolrFieldName() + ":\"" + u.getHost() + "\"", null, 0,
                            100000000, Long.MAX_VALUE, 100, 1, false, CollectionSchema.id.getSolrFieldName(),
                            CollectionSchema.sku.getSolrFieldName());
                    SolrDocument doc;
                    try {
                        while ((doc = dq.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                            String url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
                            if (url.startsWith(urlStub))
                                ids.add((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
                        }
                    } catch (final InterruptedException e) {
                    }
                } catch (final MalformedURLException e) {
                }
            }

            if (simulate) {
                count = ids.size();
                prop.put("urldelete-active", count == 0 ? 2 : 1);
            } else {
                sb.remove(ids);
                defaultConnector.commit(false);
                sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION,
                        "deletion, docs matching with " + urldelete);
                prop.put("urldelete-active", 2);
            }
        } else {
            // collect using a regular expression on urls
            String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/";
            if (simulate) {
                try {
                    count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery);
                } catch (final IOException e) {
                }
                prop.put("urldelete-active", count == 0 ? 2 : 1);
            } else {
                try {
                    defaultConnector.deleteByQuery(regexquery);
                    defaultConnector.commit(false);
                    sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION,
                            "deletion, regex match = " + urldelete);
                } catch (final IOException e) {
                }
                prop.put("urldelete-active", 2);
            }
        }
        prop.put("urldelete-active_count", count);
    }

    if (post != null && (post.containsKey("simulate-timedelete") || post.containsKey("engage-timedelete"))) {
        boolean simulate = post.containsKey("simulate-timedelete");
        Date deleteageDate = null;
        long t = timeParser(timedelete_number, timedelete_unit); // year, month, day, hour
        if (t > 0)
            deleteageDate = new Date(t);
        final String collection1Query = (timedelete_source_loaddate_checked ? CollectionSchema.load_date_dt
                : CollectionSchema.last_modified).getSolrFieldName() + ":[* TO "
                + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
        final String webgraphQuery = (timedelete_source_loaddate_checked ? WebgraphSchema.load_date_dt
                : WebgraphSchema.last_modified).getSolrFieldName() + ":[* TO "
                + ISO8601Formatter.FORMATTER.format(deleteageDate) + "]";
        if (simulate) {
            try {
                count = (int) defaultConnector.getCountByQuery(collection1Query);
            } catch (final IOException e) {
            }
            prop.put("timedelete-active", count == 0 ? 2 : 1);
        } else {
            try {
                defaultConnector.deleteByQuery(collection1Query);
                defaultConnector.commit(false);
                if (webgraphConnector != null)
                    webgraphConnector.deleteByQuery(webgraphQuery);
                sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION,
                        "deletion, docs older than " + timedelete_number + " " + timedelete_unit);
            } catch (final IOException e) {
            }
            prop.put("timedelete-active", 2);
        }
        prop.put("timedelete-active_count", count);
    }

    if (post != null
            && (post.containsKey("simulate-collectiondelete") || post.containsKey("engage-collectiondelete"))) {
        boolean simulate = post.containsKey("simulate-collectiondelete");
        collectiondelete = collectiondelete.replaceAll(" ", "").replaceAll(",", "|");
        String query = collectiondelete_mode_unassigned_checked
                ? "-" + CollectionSchema.collection_sxt + AbstractSolrConnector.CATCHALL_DTERM
                : collectiondelete.length() == 0 ? CollectionSchema.collection_sxt + ":\"\""
                        : QueryModifier.parseCollectionExpression(collectiondelete);
        if (simulate) {
            try {
                count = (int) defaultConnector.getCountByQuery(query);
            } catch (final IOException e) {
            }
            prop.put("collectiondelete-active", count == 0 ? 2 : 1);
        } else {
            try {
                defaultConnector.deleteByQuery(query);
                defaultConnector.commit(false);
                sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION,
                        "deletion, collection " + collectiondelete);
            } catch (final IOException e) {
            }
            prop.put("collectiondelete-active", 2);
        }
        prop.put("collectiondelete-active_count", count);
    }

    if (post != null && (post.containsKey("simulate-querydelete") || post.containsKey("engage-querydelete"))) {
        boolean simulate = post.containsKey("simulate-querydelete");

        SolrConnector connector = schemaName.equals(CollectionSchema.CORE_NAME) ? defaultConnector
                : sb.index.fulltext().getWebgraphConnector();
        if (simulate) {
            try {
                count = (int) connector.getCountByQuery(querydelete);
            } catch (final IOException e) {
            }
            prop.put("querydelete-active", count == 0 ? 2 : 1);
        } else {
            try {
                ConcurrentLog.info("IndexDeletion", "delete by query \"" + querydelete
                        + "\", size before deletion = " + connector.getSize());
                connector.deleteByQuery(querydelete);
                connector.commit(false);
                ConcurrentLog.info("IndexDeletion",
                        "delete by query \"" + querydelete + "\", size after commit = " + connector.getSize());
                sb.tables.recordAPICall(post, "IndexDeletion_p.html", WorkTables.TABLE_API_TYPE_DELETION,
                        "deletion, solr query, q = " + querydelete);
            } catch (final IOException e) {
            }
            prop.put("querydelete-active", 2);
        }
        prop.put("querydelete-active_count", count);
    }
    prop.put("doccount", defaultConnector.getSize());

    prop.put("cores_" + 0 + "_name", CollectionSchema.CORE_NAME);
    prop.put("cores_" + 0 + "_selected", CollectionSchema.CORE_NAME.equals(schemaName) ? 1 : 0);
    prop.put("cores_" + 1 + "_name", WebgraphSchema.CORE_NAME);
    prop.put("cores_" + 1 + "_selected", WebgraphSchema.CORE_NAME.equals(schemaName) ? 1 : 0);
    prop.put("cores", 2);

    // return rewrite properties
    return prop;
}

From source file:SolrUpdate.java

License:Apache License

public void getAllSolrRecords() {
    String pmid;/* ww  w  .  j  a  v a2 s .c om*/
    try {

        CoreAdminRequest adminRequest = new CoreAdminRequest();
        adminRequest.setAction(CoreAdminAction.RELOAD);

        SolrServer solr = new HttpSolrServer("http://localhost:8983/solr");

        String query;

        query = "pmid:*";

        SolrQuery theq = new SolrQuery();
        theq.setQuery(query);
        theq.setStart(0);
        theq.setRows(10000);

        QueryResponse response = new QueryResponse();

        response = solr.query(theq);

        SolrDocumentList list = response.getResults();

        int docnum = 1;
        for (SolrDocument doc : list) {
            Publication currlist = new Publication();

            List<String> fullnames = new ArrayList<String>();
            String currepubsum1 = "", currepubsum2 = "";

            if (doc.getFieldValue("abstract") != null) {
                currlist.setAbstract(doc.getFieldValue("abstract").toString());
            }
            if (doc.getFieldValue("ptitle") != null) {
                currlist.setTitle(doc.getFieldValue("ptitle").toString());
            }
            if (doc.getFieldValue("author_fullname_list") != null) {
                currlist.setFirst5authors(doc.getFieldValue("author_fullname_list").toString());
            }
            if (doc.getFieldValue("pmid") != null) {
                currlist.setPmid(Integer.valueOf(doc.getFieldValue("pmid").toString()));
            }
            if (doc.getFieldValue("completion") != null) {
                currlist.setCompletion(Boolean.valueOf(doc.getFieldValue("completion").toString()));
            }
            if (doc.getFieldValue("lruid") != null) {
                currlist.setLruid(doc.getFieldValue("lruid").toString());
            }
            if (doc.getFieldValue("draftpoint") != null) {
                currlist.setDraftpoint(Integer.valueOf(doc.getFieldValue("draftpoint").toString()));
            }

            if (doc.getFieldValue("journalname") != null) {
                currlist.setJournalname(doc.getFieldValue("journalname").toString());
            }

            if (doc.getFieldValue("journalyear") != null) {
                currlist.setJournalyear(doc.getFieldValue("journalyear").toString());
            }
            if (doc.getFieldValue("journalday") != null) {
                currlist.setJournalday(doc.getFieldValue("journalday").toString());
            }
            if (doc.getFieldValue("journalmonth") != null) {
                currlist.setJournalmonth(doc.getFieldValue("journalmonth").toString());
            }
            if (doc.getFieldValue("journalpage") != null) {
                currlist.setJournalstartpg(doc.getFieldValue("journalpage").toString());
            }
            if (doc.getFieldValue("journalissue") != null) {
                currlist.setJournalissue(doc.getFieldValue("journalissue").toString());
            }
            if (doc.getFieldValue("journalvolume") != null) {
                currlist.setJournalvolume(doc.getFieldValue("journalvolume").toString());
            }
            if (doc.getFieldValue("publicationdate_year") != null) {
                currlist.setYear(doc.getFieldValue("publicationdate_year").toString());
            }
            if (doc.getFieldValue("doi") != null) {
                currlist.setDoi(doc.getFieldValue("doi").toString());
            }

            if (doc.getFieldValues("pfileinfo") != null) {

                Collection<Object> currcoll = doc.getFieldValues("pfileinfo");
                for (Object currobj : currcoll) {
                    currlist.getFilesanddata().add(currobj.toString());
                }

            }
            if (doc.getFieldValue("author_firstname") != null) {
                currlist.setFauthors((List<String>) doc.getFieldValue("author_firstname"));
            }
            if (doc.getFieldValue("author_lastname") != null) {
                currlist.setLauthors((List<String>) doc.getFieldValue("author_lastname"));
            }

            if (doc.getFieldValue("epubmonth") != null) {
                currlist.setEpubmonth(doc.getFieldValue("epubmonth").toString());
            }

            if (doc.getFieldValue("epubyear") != null) {
                currlist.setEpubyear(doc.getFieldValue("epubyear").toString());
            }

            if (doc.getFieldValue("epubday") != null) {
                currlist.setEpubday(doc.getFieldValue("epubday").toString());
            }

            int counter = 0;

            for (String currstring : currlist.getFauthors()) {
                currstring += " " + currlist.getLauthors().get(counter);
                fullnames.add(currstring);
                counter++;
            }

            currlist.setFullnames(fullnames);

            if (currlist.getJournalvolume().length() > 0) {
                currepubsum2 += currlist.getJournalvolume();
            }

            if (currlist.getJournalissue().length() > 0) {
                currepubsum2 += "(" + currlist.getJournalissue() + ")" + ":";
            }

            if (currlist.getJournalstartpg().length() > 0) {
                currepubsum2 += currlist.getJournalstartpg() + ".";
            }

            if (currlist.getEpubday().length() < 1 && currlist.getEpubmonth().length() < 1
                    && currlist.getEpubyear().length() < 1) {
                currepubsum1 = "[Epub ahead of print]";
            } else if (currlist.getEpubyear().length() > 0) {
                currepubsum1 = "Epub " + currlist.getEpubyear() + " " + currlist.getEpubmonth() + " "
                        + currlist.getEpubday();
            } else {
                currepubsum1 = "";
            }

            currlist.setEpubsum(currepubsum1);
            currlist.setEpubsum2(currepubsum2);
            currlist.setIndex(docnum);

            if (currlist.getCompletion() == false) {
                currlist.setComp("Hidden");
            } else {
                currlist.setComp("Visible");
            }

            solrrecords.add(currlist);
            docnum++;
        }

    } catch (Exception ex) {
        System.out.println(ex);

    }

    System.out.println("There are a total of this many records gathered: " + solrrecords.size());
}

From source file:citation.java

License:Open Source License

public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
        final serverObjects post, final serverSwitch env) {
    // return variable that accumulates replacements
    final Switchboard sb = (Switchboard) env;
    final serverObjects prop = new serverObjects();
    final Segment segment = sb.index;
    final SolrConnector connector = segment.fulltext().getDefaultConnector();

    // avoid UNRESOLVED PATTERN
    prop.put("url", "");
    prop.put("citations", 0);
    prop.put("sentences", 0);

    DigestURL uri = null;/*from   www  .  j a va 2 s .  co m*/
    String url = "";
    String hash = "";
    int ch = 10;
    boolean filter = false; // show cited sentences only
    if (post != null) {
        if (post.containsKey("url")) {
            url = post.get("url");
            if (!url.startsWith("http://") && !url.startsWith("https://") && !url.startsWith("ftp://")
                    && !url.startsWith("smb://") && !url.startsWith("file://")) {
                url = "http://" + url;
            }
        }
        if (post.containsKey("hash")) {
            hash = post.get("hash");
        }
        if (post.containsKey("ch")) {
            ch = post.getInt("ch", ch);
        }
        filter = post.getBoolean("filter");
    }
    prop.put("filter", filter);
    if (url.length() > 0) {
        try {
            uri = new DigestURL(url, null);
            hash = ASCII.String(uri.hash());
        } catch (final MalformedURLException e) {
        }
    }
    if (uri == null && hash.length() > 0) {
        try {
            uri = sb.getURL(ASCII.getBytes(hash));
            if (uri == null) {
                connector.commit(true); // try again, that url can be fresh
                uri = sb.getURL(ASCII.getBytes(hash));
            }
        } catch (IOException e) {
            ConcurrentLog.logException(e);
        }
    }
    if (uri == null)
        return prop; // no proper url addressed
    url = uri.toNormalform(true);
    prop.put("url", url);

    // get the document from the index
    SolrDocument doc;
    try {
        doc = segment.fulltext().getDefaultConnector().getDocumentById(hash,
                CollectionSchema.title.getSolrFieldName(), CollectionSchema.text_t.getSolrFieldName());
    } catch (final IOException e1) {
        return prop;
    }
    @SuppressWarnings("unchecked")
    ArrayList<String> title = (ArrayList<String>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
    String text = (String) doc.getFieldValue(CollectionSchema.text_t.getSolrFieldName());

    ArrayList<String> sentences = new ArrayList<String>();
    if (title != null)
        for (String s : title)
            if (s.length() > 0)
                sentences.add(s);
    SentenceReader sr = new SentenceReader(text);
    StringBuilder line;
    while (sr.hasNext()) {
        line = sr.next();
        if (line.length() > 0)
            sentences.add(line.toString());
    }

    // for each line make a statistic about the number of occurrences somewhere else
    OrderedScoreMap<String> scores = new OrderedScoreMap<String>(null); // accumulates scores for citating urls
    LinkedHashMap<String, Set<DigestURL>> sentenceOcc = new LinkedHashMap<String, Set<DigestURL>>();
    for (String sentence : sentences) {
        if (sentence == null || sentence.length() < 40) {
            // do not count the very short sentences
            sentenceOcc.put(sentence, null);
            continue;
        }
        try {
            sentence = sentence.replace('"', '\'');
            SolrDocumentList doclist = connector.getDocumentListByQuery("text_t:\"" + sentence + "\"",
                    CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100,
                    CollectionSchema.sku.getSolrFieldName());
            int count = (int) doclist.getNumFound();
            if (count > 0) {
                Set<DigestURL> list = new TreeSet<DigestURL>();
                for (SolrDocument d : doclist) {
                    String u = (String) d.getFieldValue(CollectionSchema.sku.getSolrFieldName());
                    if (u == null || u.equals(url))
                        continue;
                    scores.inc(u);
                    try {
                        list.add(new DigestURL(u, null));
                    } catch (final MalformedURLException e) {
                    }
                }
                sentenceOcc.put(sentence, list);
            }
        } catch (final Throwable ee) {

        }
    }
    sentences.clear(); // we do not need this again

    // iterate the sentences
    int i = 0;
    int sentenceNr = 0;
    for (Map.Entry<String, Set<DigestURL>> se : sentenceOcc.entrySet()) {
        Set<DigestURL> app = se.getValue();
        if (filter) { // prepare list, only include sentence with citation
            if (app != null && app.size() > 0) {
                StringBuilder dd = new StringBuilder(se.getKey());
                prop.put("sentences_" + i + "_dt", sentenceNr);
                dd.append("<br/>appears in:");
                for (DigestURL u : app) {
                    if (u != null) {
                        dd.append(" <a href=\"").append(u.toNormalform(false)).append("\">").append(u.getHost())
                                .append("</a>");
                    }
                }
                prop.put("sentences_" + i + "_dd", dd.toString());
                i++;
            }
        } else { // prepare list, include all sentences
            StringBuilder dd = new StringBuilder(se.getKey());
            prop.put("sentences_" + i + "_dt", sentenceNr);
            if (app != null && app.size() > 0) {
                dd.append("<br/>appears in:");
                for (DigestURL u : app) {
                    if (u != null) {
                        dd.append(" <a href=\"").append(u.toNormalform(false)).append("\">").append(u.getHost())
                                .append("</a>");
                    }
                }
            }
            prop.put("sentences_" + i + "_dd", dd.toString());
            i++;
        }
        sentenceNr++;
    }
    prop.put("sentences", i);

    // iterate the citations in order of number of citations
    i = 0;
    for (String u : scores.keyList(false)) {
        try {
            DigestURL uu = new DigestURL(u, null);
            prop.put("citations_" + i + "_dt", "<a href=\"" + u + "\">" + u + "</a>");
            StringBuilder dd = new StringBuilder();
            dd.append("makes ").append(Integer.toString(scores.get(u))).append(" citations: of ").append(url);
            for (Map.Entry<String, Set<DigestURL>> se : sentenceOcc.entrySet()) {
                Set<DigestURL> occurls = se.getValue();
                if (occurls != null && occurls.contains(uu))
                    dd.append("<br/><a href=\"/solr/select?q=text_t:%22").append(se.getKey().replace('"', '\''))
                            .append("%22&rows=100&grep=&wt=grephtml\">").append(se.getKey()).append("</a>");
            }
            prop.put("citations_" + i + "_dd", dd.toString());
            i++;
        } catch (final MalformedURLException e) {
        }
    }
    prop.put("citations", i);

    // find similar documents from different hosts
    i = 0;
    for (String u : scores.keyList(false)) {
        if (scores.get(u) < ch)
            continue;
        try {
            DigestURL uu = new DigestURL(u, null);
            if (uu.getOrganization().equals(uri.getOrganization()))
                continue;
            prop.put("similar_links_" + i + "_url", u);
            i++;
        } catch (final MalformedURLException e) {
        }
    }
    prop.put("similar_links", i);
    prop.put("similar", i > 0 ? 1 : 0);

    // return rewrite properties
    return prop;
}

From source file:HostBrowser.java

License:Open Source License

@SuppressWarnings({ "unchecked" })
public static serverObjects respond(final RequestHeader header, final serverObjects post,
        final serverSwitch env) {
    // return variable that accumulates replacements
    final Switchboard sb = (Switchboard) env;
    Fulltext fulltext = sb.index.fulltext();
    final boolean authorized = sb.verifyAuthentication(header);
    final boolean autoload = authorized && sb.getConfigBool("browser.autoload", true);
    final boolean load4everyone = sb.getConfigBool("browser.load4everyone", false);
    final boolean loadRight = autoload || load4everyone; // add config later
    final boolean searchAllowed = sb.getConfigBool(SwitchboardConstants.PUBLIC_SEARCHPAGE, true) || authorized;

    final serverObjects prop = new serverObjects();

    // set default values
    prop.put("path", "");
    prop.put("result", "");
    prop.put("hosts", 0);
    prop.put("files", 0);
    prop.put("hostanalysis", 0);

    prop.put("admin", "false");
    boolean admin = false;

    String referer = header.get("Referer", "");
    if ((post != null && post.getBoolean("admin")) || referer.contains("HostBrowser.html?admin=true")) {
        prop.put("topmenu", 2);
        prop.put("admin", "true");
        admin = true;/*ww w .  j  av a  2 s. com*/
    } else if (authorized) { // show top nav to admins
        prop.put("topmenu", 1);
    } else { // for other respect setting in Search Design Configuration
        prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
    }
    final String promoteSearchPageGreeting = (env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME,
            false)) ? env.getConfig("network.unit.description", "")
                    : env.getConfig(SwitchboardConstants.GREETING, "");
    prop.put("topmenu_promoteSearchPageGreeting", promoteSearchPageGreeting);

    if (!searchAllowed) {
        prop.put("result", "You are not allowed to use this page. Please ask an administrator for permission.");
        prop.putNum("ucount", 0);
        return prop;
    }

    String path = post == null ? "" : post.get("path", "").trim();
    if (authorized)
        sb.index.fulltext().commit(true);
    if (post == null || env == null) {
        prop.putNum("ucount", fulltext.collectionSize());
        return prop;
    }

    int p = path.lastIndexOf('/');
    if (p < 0 && path.length() > 0)
        path = path + "/";
    else if (p > 7)
        path = path.substring(0, p + 1); // the search path shall always end with "/"
    if (path.length() > 0 && (!path.startsWith("http://") && !path.startsWith("https://")
            && !path.startsWith("ftp://") && !path.startsWith("smb://") && !path.startsWith("file://"))) {
        path = "http://" + path;
    }
    prop.putHTML("path", path);
    prop.put("delete", authorized && path.length() > 0 ? 1 : 0);

    DigestURL pathURI = null;
    try {
        pathURI = new DigestURL(path);
    } catch (final MalformedURLException e) {
    }

    String load = post.get("load", "");
    boolean wait = false;
    try {
        if (loadRight && autoload && path.length() != 0 && pathURI != null && load.length() == 0
                && sb.index.getLoadTime(ASCII.String(pathURI.hash())) < 0) {
            // in case that the url does not exist and loading is wanted turn this request into a loading request
            load = path;
            wait = true;
        }
    } catch (IOException e1) {
        load = path;
        wait = true;
    }
    if (load.length() > 0 && loadRight) {
        // stack URL
        DigestURL url;
        if (sb.crawlStacker.size() > 2)
            wait = false;
        try {
            url = new DigestURL(load);
            String reasonString = sb.crawlStacker.stackCrawl(new Request(sb.peers.mySeed().hash.getBytes(), url,
                    null, load, new Date(), sb.crawler.defaultProxyProfile.handle(), 0,
                    sb.crawler.defaultProxyProfile.timezoneOffset()));
            prop.putHTML("result", reasonString == null ? ("added url to indexer: " + load)
                    : ("not indexed url '" + load + "': " + reasonString));
            if (wait)
                waitloop: for (int i = 0; i < 30; i++) {
                    try {
                        if (sb.index.getLoadTime(ASCII.String(url.hash())) >= 0)
                            break;
                    } catch (IOException e1) {
                        e1.printStackTrace();
                        break waitloop;
                    }
                    try {
                        Thread.sleep(100);
                    } catch (final InterruptedException e) {
                    }
                }
        } catch (final MalformedURLException e) {
            prop.putHTML("result", "bad url '" + load + "'");
        }
    }

    if (authorized && post.containsKey("deleteLoadErrors")) {
        try {
            fulltext.getDefaultConnector()
                    .deleteByQuery("-" + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200 AND "
                            + CollectionSchema.httpstatus_i.getSolrFieldName()
                            + AbstractSolrConnector.CATCHALL_DTERM); // make sure field exists
            ConcurrentLog.info("HostBrowser:", "delete documents with httpstatus_i <> 200");
            fulltext.getDefaultConnector().deleteByQuery(
                    CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.fail.name() + "\"");
            ConcurrentLog.info("HostBrowser:", "delete documents with failtype_s = fail");
            fulltext.getDefaultConnector().deleteByQuery(
                    CollectionSchema.failtype_s.getSolrFieldName() + ":\"" + FailType.excl.name() + "\"");
            ConcurrentLog.info("HostBrowser:", "delete documents with failtype_s = excl");
            prop.putNum("ucount", fulltext.collectionSize());
            return prop;
        } catch (final IOException ex) {
            ConcurrentLog.logException(ex);
        }
    }

    if (post.containsKey("hosts")) {
        // generate host list
        try {
            boolean onlyCrawling = "crawling".equals(post.get("hosts", ""));
            boolean onlyErrors = "error".equals(post.get("hosts", ""));

            int maxcount = authorized ? 2 * 3 * 2 * 5 * 7 * 2 * 3 : 360; // which makes nice matrixes for 2, 3, 4, 5, 6, 7, 8, 9 rows/colums

            // collect hosts from index
            ReversibleScoreMap<String> hostscore = fulltext.getDefaultConnector()
                    .getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", maxcount,
                            CollectionSchema.host_s.getSolrFieldName())
                    .get(CollectionSchema.host_s.getSolrFieldName());
            if (hostscore == null)
                hostscore = new ClusteredScoreMap<String>(true);

            // collect hosts from crawler
            final Map<String, Integer[]> crawler = (authorized)
                    ? sb.crawlQueues.noticeURL.getDomainStackHosts(StackType.LOCAL, sb.robots)
                    : new HashMap<String, Integer[]>();

            // collect the errorurls
            Map<String, ReversibleScoreMap<String>> exclfacets = authorized ? fulltext.getDefaultConnector()
                    .getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.excl.name(),
                            maxcount, CollectionSchema.host_s.getSolrFieldName())
                    : null;
            ReversibleScoreMap<String> exclscore = exclfacets == null ? new ClusteredScoreMap<String>(true)
                    : exclfacets.get(CollectionSchema.host_s.getSolrFieldName());
            Map<String, ReversibleScoreMap<String>> failfacets = authorized ? fulltext.getDefaultConnector()
                    .getFacets(CollectionSchema.failtype_s.getSolrFieldName() + ":" + FailType.fail.name(),
                            maxcount, CollectionSchema.host_s.getSolrFieldName())
                    : null;
            ReversibleScoreMap<String> failscore = failfacets == null ? new ClusteredScoreMap<String>(true)
                    : failfacets.get(CollectionSchema.host_s.getSolrFieldName());

            int c = 0;
            Iterator<String> i = hostscore.keys(false);
            String host;
            while (i.hasNext() && c < maxcount) {
                host = i.next();
                prop.put("hosts_list_" + c + "_admin", admin ? "true" : "false");
                prop.putHTML("hosts_list_" + c + "_host", host);
                boolean inCrawler = crawler.containsKey(host);
                int exclcount = exclscore.get(host);
                int failcount = failscore.get(host);
                int errors = exclcount + failcount;
                prop.put("hosts_list_" + c + "_count", hostscore.get(host));
                prop.put("hosts_list_" + c + "_crawler", inCrawler ? 1 : 0);
                if (inCrawler)
                    prop.put("hosts_list_" + c + "_crawler_pending", crawler.get(host)[0]);
                prop.put("hosts_list_" + c + "_errors", errors > 0 ? 1 : 0);
                if (errors > 0) {
                    prop.put("hosts_list_" + c + "_errors_exclcount", exclcount);
                    prop.put("hosts_list_" + c + "_errors_failcount", failcount);
                }
                prop.put("hosts_list_" + c + "_type", inCrawler ? 2 : errors > 0 ? 1 : 0);
                if (onlyCrawling) {
                    if (inCrawler)
                        c++;
                } else if (onlyErrors) {
                    if (errors > 0)
                        c++;
                } else {
                    c++;
                }
            }
            prop.put("hosts_list", c);
            prop.put("hosts", 1);
        } catch (final IOException e) {
            ConcurrentLog.logException(e);
        }
    }

    if (path.length() > 0) {
        try {
            DigestURL uri = new DigestURL(path);
            String host = uri.getHost();

            // write host analysis if path after host is empty
            if (uri.getPath().length() <= 1 && host != null && host.length() > 0
                    && sb.getConfigBool("decoration.hostanalysis", false)) {
                //how many documents per crawldepth_i; get crawldepth_i facet for host
                ArrayList<String> ff = new ArrayList<>();
                for (CollectionSchema csf : CollectionSchema.values()) {
                    if ((csf.getType() != SolrType.num_integer && csf.getType() != SolrType.num_long)
                            || csf.isMultiValued())
                        continue;
                    String facetfield = csf.getSolrFieldName();
                    if (!fulltext.getDefaultConfiguration().contains(facetfield))
                        continue;
                    ff.add(csf.getSolrFieldName());
                }
                // add also vocabulary counters
                Map<String, ReversibleScoreMap<String>> vocabularyFacet = sb.index.fulltext()
                        .getDefaultConnector().getFacets(
                                CollectionSchema.vocabularies_sxt.getSolrFieldName()
                                        + AbstractSolrConnector.CATCHALL_DTERM,
                                100, CollectionSchema.vocabularies_sxt.getSolrFieldName());
                if (vocabularyFacet.size() > 0) {
                    Collection<String> vocnames = vocabularyFacet.values().iterator().next().keyList(true);
                    for (String vocname : vocnames) {
                        ff.add(CollectionSchema.VOCABULARY_PREFIX + vocname
                                + CollectionSchema.VOCABULARY_LOGCOUNT_SUFFIX);
                        ff.add(CollectionSchema.VOCABULARY_PREFIX + vocname
                                + CollectionSchema.VOCABULARY_LOGCOUNTS_SUFFIX);
                    }
                }
                // list the facets
                String[] facetfields = ff.toArray(new String[ff.size()]);
                Map<String, ReversibleScoreMap<String>> facets = fulltext.getDefaultConnector().getFacets(
                        CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 100, facetfields);
                int fc = 0;
                for (Map.Entry<String, ReversibleScoreMap<String>> facetentry : facets.entrySet()) {
                    ReversibleScoreMap<String> facetfieldmap = facetentry.getValue();
                    if (facetfieldmap.size() == 0)
                        continue;
                    TreeMap<Long, Integer> statMap = new TreeMap<>();
                    for (String k : facetfieldmap)
                        statMap.put(Long.parseLong(k), facetfieldmap.get(k));
                    prop.put("hostanalysis_facets_" + fc + "_facetname", facetentry.getKey());
                    int c = 0;
                    for (Entry<Long, Integer> entry : statMap.entrySet()) {
                        prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_key", entry.getKey());
                        prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_count", entry.getValue());
                        prop.put("hostanalysis_facets_" + fc + "_facet_" + c + "_a",
                                "http://localhost:" + sb.getConfigInt("port", 8090)
                                        + "/solr/collection1/select?q=host_s:" + host + " AND "
                                        + facetentry.getKey() + ":" + entry.getKey()
                                        + "&defType=edismax&start=0&rows=1000&fl=sku,crawldepth_i");
                        c++;
                    }
                    prop.put("hostanalysis_facets_" + fc + "_facet", c);
                    fc++;
                }
                prop.put("hostanalysis_facets", fc);
                prop.put("hostanalysis", 1);
            }

            // write file list for subpath
            boolean delete = false;
            boolean reload404 = false;
            if (authorized && post.containsKey("delete")) {
                // delete the complete path!! That includes everything that matches with this prefix.
                delete = true;
            }
            if (authorized && post.containsKey("reload404")) {
                // try to re-load all urls that have load errors and matches with this prefix.
                reload404 = true;
            }
            int facetcount = post.getInt("facetcount", 0);
            boolean complete = post.getBoolean("complete");
            if (complete) { // we want only root paths for complete lists
                p = path.indexOf('/', 10);
                if (p > 0)
                    path = path.substring(0, p + 1);
            }
            prop.put("files_complete", complete ? 1 : 0);
            prop.put("files_complete_admin", admin ? "true" : "false");
            prop.putHTML("files_complete_path", path);
            p = path.substring(0, path.length() - 1).lastIndexOf('/');
            if (p < 8) {
                prop.put("files_root", 1);
            } else {
                prop.put("files_root", 0);
                prop.putHTML("files_root_path", path.substring(0, p + 1));
                prop.put("files_root_admin", admin ? "true" : "false");
            }
            // generate file list from path
            prop.putHTML("outbound_host", host);
            if (authorized)
                prop.putHTML("outbound_admin_host", host); //used for WebStructurePicture_p link
            prop.putHTML("inbound_host", host);
            String hosthash = ASCII.String(uri.hash(), 6, 6);
            String[] pathparts = uri.getPaths();

            // get all files for a specific host from the index
            StringBuilder q = new StringBuilder();
            if (host == null) {
                if (path.startsWith("file://")) {
                    q.append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(":file");
                }
            } else {
                q.append(CollectionSchema.host_s.getSolrFieldName()).append(":\"").append(host).append("\"");
            }
            if (pathparts.length > 0 && pathparts[0].length() > 0) {
                for (String pe : pathparts) {
                    if (pe.length() > 0)
                        q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName())
                                .append(":\"").append(pe).append('\"');
                }
            } else {
                if (facetcount > 1000 || post.containsKey("nepr")) {
                    q.append(" AND ").append(CollectionSchema.url_paths_sxt.getSolrFieldName())
                            .append(AbstractSolrConnector.CATCHALL_DTERM);
                }
            }
            BlockingQueue<SolrDocument> docs = fulltext.getDefaultConnector().concurrentDocumentsByQuery(
                    q.toString(), CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000, TIMEOUT,
                    100, 1, false, CollectionSchema.id.getSolrFieldName(),
                    CollectionSchema.sku.getSolrFieldName(), CollectionSchema.failreason_s.getSolrFieldName(),
                    CollectionSchema.failtype_s.getSolrFieldName(),
                    CollectionSchema.inboundlinks_protocol_sxt.getSolrFieldName(),
                    CollectionSchema.inboundlinks_urlstub_sxt.getSolrFieldName(),
                    CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(),
                    CollectionSchema.outboundlinks_urlstub_sxt.getSolrFieldName(),
                    CollectionSchema.crawldepth_i.getSolrFieldName(),
                    CollectionSchema.references_i.getSolrFieldName(),
                    CollectionSchema.references_internal_i.getSolrFieldName(),
                    CollectionSchema.references_external_i.getSolrFieldName(),
                    CollectionSchema.references_exthosts_i.getSolrFieldName(),
                    CollectionSchema.cr_host_chance_d.getSolrFieldName(),
                    CollectionSchema.cr_host_norm_i.getSolrFieldName());
            SolrDocument doc;
            Set<String> storedDocs = new HashSet<String>();
            Map<String, FailType> errorDocs = new HashMap<String, FailType>();
            Set<String> inboundLinks = new HashSet<String>();
            Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
            Map<String, InfoCacheEntry> infoCache = new HashMap<String, InfoCacheEntry>();
            int hostsize = 0;
            final List<String> deleteIDs = new ArrayList<String>();
            final Collection<String> reloadURLs = new ArrayList<String>();
            final Set<String> reloadURLCollection = new HashSet<String>();
            long timeoutList = System.currentTimeMillis() + TIMEOUT;
            long timeoutReferences = System.currentTimeMillis() + 6000;
            ReferenceReportCache rrCache = sb.index.getReferenceReportCache();
            while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                String u = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
                String errortype = (String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName());
                FailType error = errortype == null ? null : FailType.valueOf(errortype);
                String ids = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
                infoCache.put(ids, new InfoCacheEntry(sb.index.fulltext(), rrCache, doc, ids,
                        System.currentTimeMillis() < timeoutReferences));
                if (u.startsWith(path)) {
                    if (delete) {
                        deleteIDs.add(ids);
                    } else {
                        if (error == null)
                            storedDocs.add(u);
                        else {
                            if (reload404 && error == FailType.fail) {
                                ArrayList<String> collections = (ArrayList<String>) doc
                                        .getFieldValue(CollectionSchema.collection_sxt.getSolrFieldName());
                                if (collections != null)
                                    reloadURLCollection.addAll(collections);
                                reloadURLs.add(u);
                            }
                            if (authorized)
                                errorDocs.put(u, error);
                        }
                    }
                } else if (complete) {
                    if (error == null)
                        storedDocs.add(u);
                    else {
                        if (authorized)
                            errorDocs.put(u, error);
                    }
                }
                if ((complete || u.startsWith(path)) && !storedDocs.contains(u))
                    inboundLinks.add(u); // add the current link
                if (error == null) {
                    hostsize++;
                    // collect inboundlinks to browse the host
                    Iterator<String> links = URIMetadataNode.getLinks(doc, true);
                    while (links.hasNext()) {
                        u = links.next();
                        if ((complete || u.startsWith(path)) && !storedDocs.contains(u))
                            inboundLinks.add(u);
                    }

                    // collect referrer links
                    links = URIMetadataNode.getLinks(doc, false);
                    while (links.hasNext()) {
                        u = links.next();
                        try {
                            MultiProtocolURL mu = new MultiProtocolURL(u);
                            if (mu.getHost() != null) {
                                ReversibleScoreMap<String> lks = outboundHosts.get(mu.getHost());
                                if (lks == null) {
                                    lks = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
                                    outboundHosts.put(mu.getHost(), lks);
                                }
                                lks.set(u, u.length());
                            }
                        } catch (final MalformedURLException e) {
                        }
                    }
                }
                if (System.currentTimeMillis() > timeoutList)
                    break;
            }
            if (deleteIDs.size() > 0)
                sb.remove(deleteIDs);
            if (reloadURLs.size() > 0) {
                final Map<String, Pattern> cm = new LinkedHashMap<String, Pattern>();
                for (String collection : reloadURLCollection)
                    cm.put(collection, QueryParams.catchall_pattern);
                sb.reload(reloadURLs, cm.size() > 0 ? cm : CrawlProfile.collectionParser("user"), false);
            }

            // collect from crawler
            List<Request> domainStackReferences = (authorized)
                    ? sb.crawlQueues.noticeURL.getDomainStackReferences(StackType.LOCAL, host, 1000, 3000)
                    : new ArrayList<Request>(0);
            Set<String> loadingLinks = new HashSet<String>();
            for (Request crawlEntry : domainStackReferences)
                loadingLinks.add(crawlEntry.url().toNormalform(true));

            // now combine all lists into one
            Map<String, StoreType> files = new HashMap<String, StoreType>();
            for (String u : storedDocs)
                files.put(u, StoreType.INDEX);
            for (Map.Entry<String, FailType> e : errorDocs.entrySet())
                files.put(e.getKey(), e.getValue() == FailType.fail ? StoreType.FAILED : StoreType.EXCLUDED);
            for (String u : inboundLinks)
                if (!files.containsKey(u))
                    files.put(u, StoreType.LINK);
            for (String u : loadingLinks)
                if (u.startsWith(path) && !files.containsKey(u))
                    files.put(u, StoreType.LINK);
            ConcurrentLog.info("HostBrowser", "collected " + files.size() + " urls for path " + path);

            // distinguish files and folders
            Map<String, Object> list = new TreeMap<String, Object>(); // a directory list; if object is boolean, its a file; if its a int[], then its a folder
            int pl = path.length();
            String file;
            for (Map.Entry<String, StoreType> entry : files.entrySet()) {
                if (entry.getKey().length() < pl)
                    continue; // this is not inside the path
                if (!entry.getKey().startsWith(path))
                    continue;
                file = entry.getKey().substring(pl);
                StoreType type = entry.getValue();
                p = file.indexOf('/');
                if (p < 0) {
                    // this is a file
                    list.put(entry.getKey(), type); // StoreType value: this is a file; true -> file is in index; false -> not in index, maybe in crawler
                } else {
                    // this is a directory path or a file in a subdirectory
                    String remainingPath = file.substring(0, p + 1);
                    if (complete && remainingPath.indexOf('.') > 0) {
                        list.put(entry.getKey(), type); // StoreType value: this is a file
                    } else {
                        String dir = path + remainingPath;
                        Object c = list.get(dir);
                        if (c == null) {
                            int[] linkedStoredIncrawlerError = new int[] { 0, 0, 0, 0, 0 };
                            if (type == StoreType.LINK)
                                linkedStoredIncrawlerError[0]++;
                            if (type == StoreType.INDEX)
                                linkedStoredIncrawlerError[1]++;
                            if (loadingLinks.contains(entry.getKey()))
                                linkedStoredIncrawlerError[2]++;
                            if (errorDocs.containsKey(entry.getKey()))
                                linkedStoredIncrawlerError[errorDocs.get(entry.getKey()) == FailType.excl ? 3
                                        : 4]++;
                            list.put(dir, linkedStoredIncrawlerError);
                        } else if (c instanceof int[]) {
                            if (type == StoreType.LINK)
                                ((int[]) c)[0]++;
                            if (type == StoreType.INDEX)
                                ((int[]) c)[1]++;
                            if (loadingLinks.contains(entry.getKey()))
                                ((int[]) c)[2]++;
                            if (errorDocs.containsKey(entry.getKey()))
                                ((int[]) c)[errorDocs.get(entry.getKey()) == FailType.excl ? 3 : 4]++;
                        }
                    }
                }
            }

            int maxcount = 1000;
            int c = 0;
            // first list only folders
            int filecounter = 0;
            for (Map.Entry<String, Object> entry : list.entrySet()) {
                if ((entry.getValue() instanceof StoreType)) {
                    filecounter++;
                } else {
                    // this is a folder
                    prop.put("files_list_" + c + "_type", 1);
                    prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
                    prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
                    int linked = ((int[]) entry.getValue())[0];
                    int stored = ((int[]) entry.getValue())[1];
                    int crawler = ((int[]) entry.getValue())[2];
                    int excl = ((int[]) entry.getValue())[3];
                    int error = ((int[]) entry.getValue())[4];
                    prop.put("files_list_" + c + "_type_stored", stored);
                    prop.put("files_list_" + c + "_type_linked", linked);
                    prop.put("files_list_" + c + "_type_pendingVisible", crawler > 0 ? 1 : 0);
                    prop.put("files_list_" + c + "_type_pending", crawler);
                    prop.put("files_list_" + c + "_type_excludedVisible", excl > 0 ? 1 : 0);
                    prop.put("files_list_" + c + "_type_excluded", excl);
                    prop.put("files_list_" + c + "_type_failedVisible", error > 0 ? 1 : 0);
                    prop.put("files_list_" + c + "_type_failed", error);
                    if (++c >= maxcount)
                        break;
                }
            }
            // then list files
            for (Map.Entry<String, Object> entry : list.entrySet()) {
                if (entry.getValue() instanceof StoreType) {
                    // this is a file
                    prop.put("files_list_" + c + "_type", 0);
                    prop.putHTML("files_list_" + c + "_type_url", entry.getKey());
                    prop.putHTML("files_list_" + c + "_type_admin", admin ? "true" : "false");
                    StoreType type = (StoreType) entry.getValue();
                    try {
                        uri = new DigestURL(entry.getKey());
                    } catch (final MalformedURLException e) {
                        uri = null;
                    }
                    HarvestProcess process = uri == null ? null : sb.crawlQueues.exists(uri.hash()); // todo: cannot identify errors
                    boolean loading = load.equals(entry.getKey())
                            || (process != null && process != HarvestProcess.ERRORS);
                    boolean error = process == HarvestProcess.ERRORS || type == StoreType.EXCLUDED
                            || type == StoreType.FAILED;
                    boolean dc = type != StoreType.INDEX && !error && !loading
                            && list.containsKey(entry.getKey() + "/");
                    if (!dc) {
                        prop.put("files_list_" + c + "_type_stored",
                                type == StoreType.INDEX ? 1 : error ? 3 : loading ? 2 : 0 /*linked*/);
                        if (type == StoreType.INDEX) {
                            String ids = ASCII.String(uri.hash());
                            InfoCacheEntry ice = infoCache.get(ids);
                            prop.put("files_list_" + c + "_type_stored_comment",
                                    ice == null ? "" : ice.toString()); // ice.toString() contains html, therefore do not use putHTML here
                        }
                        prop.put("files_list_" + c + "_type_stored_load", loadRight ? 1 : 0);
                        if (error) {
                            FailType failType = errorDocs.get(entry.getKey());
                            if (failType == null) {
                                // maybe this is only in the errorURL
                                //Metadata faildoc = sb.index.fulltext().getDefaultConnector().getMetadata(ASCII.String(uri.hash()));
                                prop.putHTML("files_list_" + c + "_type_stored_error", "unknown error");
                            } else {
                                String ids = ASCII.String(uri.hash());
                                InfoCacheEntry ice = infoCache.get(ids);
                                prop.put("files_list_" + c + "_type_stored_error",
                                        failType == FailType.excl ? "excluded from indexing"
                                                : "load fail" + (ice == null ? "" : "; " + ice.toString()));
                            }
                        }
                        if (loadRight) {
                            prop.putHTML("files_list_" + c + "_type_stored_load_url", entry.getKey());
                            prop.putHTML("files_list_" + c + "_type_stored_load_path", path);
                        }
                        if (++c >= maxcount)
                            break;
                    }
                }
            }
            prop.put("files_list", c);
            prop.putHTML("files_path", path);
            prop.put("files_hostsize", hostsize);
            prop.put("files_subpathloadsize", storedDocs.size());
            prop.put("files_subpathdetectedsize", filecounter - storedDocs.size());
            prop.put("files", 1);
            uri = new DigestURL(path);
            if (post.containsKey("showlinkstructure")) {
                sb.setConfig(SwitchboardConstants.DECORATION_GRAFICS_LINKSTRUCTURE, true);
            }
            prop.put("files_linkgraph", uri.getPath().length() <= 1 && hostsize > 0
                    && sb.getConfigBool(SwitchboardConstants.DECORATION_GRAFICS_LINKSTRUCTURE, true));
            prop.put("files_linkgraph_host", uri.getHost());

            // generate inbound-links table
            StructureEntry struct = sb.webStructure.incomingReferences(hosthash);
            if (struct != null && struct.references.size() > 0) {
                maxcount = 200;
                ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(
                        UTF8.insensitiveUTF8Comparator);
                for (Map.Entry<String, Integer> entry : struct.references.entrySet())
                    score.set(entry.getKey(), entry.getValue());
                c = 0;
                Iterator<String> i = score.keys(false);
                while (i.hasNext() && c < maxcount) {
                    host = i.next();
                    prop.put("inbound_list_" + c + "_admin", admin ? "true" : "false");
                    prop.putHTML("inbound_list_" + c + "_host", sb.webStructure.hostHash2hostName(host));
                    prop.put("inbound_list_" + c + "_count", score.get(host));
                    c++;
                }
                prop.put("inbound_list", c);
                prop.put("inbound", 1);
            } else {
                prop.put("inbound", 0);
            }

            // generate outbound-links table
            if (outboundHosts.size() > 0) {
                maxcount = 200;
                ReversibleScoreMap<String> score = new ClusteredScoreMap<String>(
                        UTF8.insensitiveUTF8Comparator);
                for (Map.Entry<String, ReversibleScoreMap<String>> entry : outboundHosts.entrySet())
                    score.set(entry.getKey(), entry.getValue().size());
                c = 0;
                Iterator<String> i = score.keys(false);
                while (i.hasNext() && c < maxcount) {
                    host = i.next();
                    prop.putHTML("outbound_list_" + c + "_host", host);
                    prop.put("outbound_list_" + c + "_count", score.get(host));
                    prop.put("outbound_list_" + c + "_link", outboundHosts.get(host).getMinKey());
                    prop.put("outbound_list_" + c + "_admin", admin ? "true" : "false");
                    c++;
                }
                prop.put("outbound_list", c);
                prop.put("outbound", 1);
            } else {
                prop.put("outbound", 0);
            }

        } catch (final Throwable e) {
            ConcurrentLog.logException(e);
        }
    }

    // return rewrite properties
    prop.putNum("ucount", fulltext.collectionSize());
    return prop;
}

From source file:alba.solr.common.MySimpleFloatFunction.java

License:Apache License

@DocTransformer(name = "reverse")
public void reverse(SolrDocument doc) {
    String label = doc.getFieldValue("label").toString();
    StringBuffer sb = new StringBuffer(label);
    doc.setField("label", sb.reverse().toString());
}

From source file:alba.solr.common.MySimpleFloatFunction.java

License:Apache License

@DocTransformer(name = "ucase")
public void ucase(SolrDocument doc, @Param(name = "field", description = "the field") String field) {
    doc.setField(field, doc.getFieldValue(field).toString().toUpperCase());
}

From source file:argendata.service.impl.AppServiceImpl.java

private List<App> searchApp(String myTerms, List<String> mySortByFields, List<String> myKeywords)
        throws MalformedURLException, SolrServerException {

    QueryResponse queryRsp = searchQueryApp(myTerms, mySortByFields, myKeywords);

    SolrDocumentList docs = queryRsp.getResults();
    List<SolrDocument> list = docs.subList(0, docs.size());
    List<App> resp = new ArrayList<App>();

    for (SolrDocument sd : list) {

        App anApp = this.getApprovedAppByQName("Semanticapp:" + (String) sd.getFieldValue("titleId"));

        resp.add(anApp);//from ww  w  .j ava 2s .  c  om
    }

    return resp;

}

From source file:argendata.service.impl.DatasetServiceImpl.java

private List<Dataset> getDatasets(QueryResponse rsp) {

    List<Dataset> datasets = new ArrayList<Dataset>();
    SolrDocumentList docs = rsp.getResults();

    List<SolrDocument> list = docs.subList(0, docs.size());

    Set<String> keywords;
    for (SolrDocument sd : list) {
        keywords = new HashSet<String>();
        Object obj = sd.getFieldValue("keyword");
        if (obj instanceof String) {
            keywords.add((String) obj);
        } else/*from  w w  w .j a va2  s .c o m*/
            keywords.addAll((List<String>) obj);

        datasets.add(new Dataset((String) (sd.getFieldValue("title")), (String) (sd.getFieldValue("license")),
                keywords, (String) (sd.getFieldValue("dataQuality")),
                (String) ((Date) (sd.getFieldValue("modified"))).toString(),
                (String) (sd.getFieldValue("spatial")), (String) (sd.getFieldValue("temporal")),
                (String) (sd.getFieldValue("location")), (String) (sd.getFieldValue("publisher")),
                (String) (sd.getFieldValue("resourceType")), (String) (sd.getFieldValue("titleId"))));
    }

    return datasets;
}

From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java

License:Apache License

@Test
public void testRead() {
    SolrDocument document = new SolrDocument();
    document.addField("stringProperty", "christoph");
    document.addField("intProperty", 32);

    ConvertableBean convertable = converter.read(ConvertableBean.class, (Map<String, Object>) document);

    Assert.assertEquals(document.getFieldValue("stringProperty"), convertable.getStringProperty());
    Assert.assertEquals(document.getFieldValue("intProperty"), convertable.getIntProperty());
}

From source file:at.tugraz.kmi.medokyservice.rec.io.SolrDBClient.java

License:Open Source License

public List<ContentTags> getContentTagsByUser(String username) throws SolrServerException {

    List<ContentTags> tagList = new LinkedList<ContentTags>();

    SolrQuery query = new SolrQuery("username:" + username).setFields("tags, timestamp")
            .setFilterQueries("tags:['' TO *]").setRows(10000000);

    QueryResponse rsp = this.query(query);

    SolrDocumentList docs = rsp.getResults();

    for (SolrDocument doc : docs) {
        Date date = (Date) doc.getFieldValue("timestamp");
        if (date == null || doc.getFieldValue("tags") == null)
            continue;

        ContentTags tagEntry = new ContentTags(date);
        tagEntry.tags = ((ArrayList<String>) doc.getFieldValue("tags"));
        tagList.add(tagEntry);//from  w  ww.  j  av  a2 s  .co  m
    }

    return tagList;
}