Java tutorial
/* * semanticcms-core-sitemap - Automatic sitemaps for SemanticCMS. * Copyright (C) 2016, 2017, 2018 AO Industries, Inc. * support@aoindustries.com * 7262 Bull Pen Cir * Mobile, AL 36695 * * This file is part of semanticcms-core-sitemap. * * semanticcms-core-sitemap is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * semanticcms-core-sitemap is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with semanticcms-core-sitemap. If not, see <http://www.gnu.org/licenses/>. */ package com.semanticcms.core.sitemap; import static com.aoindustries.encoding.TextInXhtmlEncoder.encodeTextInXhtml; import static com.aoindustries.encoding.TextInXhtmlEncoder.textInXhtmlEncoder; import com.aoindustries.servlet.http.ServletUtil; import com.aoindustries.tempfiles.TempFileContext; import com.aoindustries.tempfiles.servlet.ServletTempFileContext; import com.aoindustries.util.Tuple2; import com.semanticcms.core.controller.Book; import com.semanticcms.core.controller.CapturePage; import com.semanticcms.core.controller.CountConcurrencyFilter; import com.semanticcms.core.controller.SemanticCMS; import com.semanticcms.core.controller.subrequest.HttpServletSubRequest; import com.semanticcms.core.controller.subrequest.HttpServletSubResponse; import com.semanticcms.core.controller.subrequest.UnmodifiableCopyHttpServletRequest; import com.semanticcms.core.controller.subrequest.UnmodifiableCopyHttpServletResponse; import com.semanticcms.core.model.ChildRef; import com.semanticcms.core.model.Page; import com.semanticcms.core.model.PageRef; import com.semanticcms.core.pages.CaptureLevel; import com.semanticcms.core.renderer.html.HtmlRenderer; import com.semanticcms.core.renderer.html.View; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.logging.Level; import java.util.logging.Logger; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.joda.time.ReadableInstant; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; /** * Creates a site map index of all per-book sitemaps. */ @WebServlet(SiteMapIndexServlet.SERVLET_PATH) public class SiteMapIndexServlet extends HttpServlet { private static final long serialVersionUID = 1L; private static final Logger logger = Logger.getLogger(SiteMapIndexServlet.class.getName()); public static final String SERVLET_PATH = "/sitemap-index.xml"; private static final String CONTENT_TYPE = "application/xml"; private static final String ENCODING = "UTF-8"; private static void writeSitemap(HttpServletRequest req, HttpServletResponse resp, PrintWriter out, Book book, ReadableInstant lastmod, DateTimeFormatter iso8601) throws IOException { out.println(" <sitemap>"); out.print(" <loc>"); ServletUtil.getAbsoluteURL(req, resp.encodeURL(book.getBookRef().getPrefix() + SiteMapServlet.SERVLET_PATH), textInXhtmlEncoder, out); out.println("</loc>"); if (lastmod != null) { out.print(" <lastmod>"); encodeTextInXhtml(iso8601.print(lastmod), out); out.println("</lastmod>"); } out.println(" </sitemap>"); } /** * The response is not given to getLastModified, but we need it for captures to get * the last modified. */ private static final String RESPONSE_IN_REQUEST_ATTRIBUTE = SiteMapIndexServlet.class.getName() + ".responseInRequest"; @Override protected void service(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { Object old = req.getAttribute(RESPONSE_IN_REQUEST_ATTRIBUTE); try { req.setAttribute(RESPONSE_IN_REQUEST_ATTRIBUTE, resp); super.service(req, resp); } finally { req.setAttribute(RESPONSE_IN_REQUEST_ATTRIBUTE, old); } } /** * Gets all the books that contain at least one accessible view/page combo. * Also provides the last modified time, if known, for the book. */ private static List<Tuple2<Book, ReadableInstant>> getSitemapBooks(final ServletContext servletContext, HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { SemanticCMS semanticCMS = SemanticCMS.getInstance(servletContext); HtmlRenderer htmlRenderer = HtmlRenderer.getInstance(servletContext); final SortedSet<View> views = htmlRenderer.getViews(); List<Book> books; { // Filter published and accessible only Collection<Book> values = semanticCMS.getPublishedBooks().values(); books = new ArrayList<Book>(values.size()); for (Book book : values) if (book.isAccessible()) books.add(book); } int numBooks = books.size(); if (numBooks > 1 && CountConcurrencyFilter.useConcurrentSubrequests(req)) { // Concurrent implementation final HttpServletRequest threadSafeReq = new UnmodifiableCopyHttpServletRequest(req); final HttpServletResponse threadSafeResp = new UnmodifiableCopyHttpServletResponse(resp); final TempFileContext tempFileContext = ServletTempFileContext.getTempFileContext(req); List<Book> booksWithSiteMapUrl; { List<Callable<Boolean>> tasks = new ArrayList<Callable<Boolean>>(numBooks); { for (final Book book : books) { tasks.add(new Callable<Boolean>() { @Override public Boolean call() throws ServletException, IOException { HttpServletRequest subrequest = new HttpServletSubRequest(threadSafeReq); HttpServletResponse subresponse = new HttpServletSubResponse(threadSafeResp, tempFileContext); if (logger.isLoggable(Level.FINE)) logger.log(Level.FINE, "called, subrequest={0}, book={1}", new Object[] { subrequest, book }); return hasSiteMapUrl(servletContext, subrequest, subresponse, views, book, book.getContentRoot() //new HashSet<PageRef>() ); } }); } } List<Boolean> results; try { results = semanticCMS.getExecutors().getPerProcessor().callAll(tasks); } catch (InterruptedException e) { throw new ServletException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) throw (RuntimeException) cause; if (cause instanceof ServletException) throw (ServletException) cause; if (cause instanceof IOException) throw (IOException) cause; throw new ServletException(cause); } // Now find the last modified with concurrency booksWithSiteMapUrl = new ArrayList<Book>(numBooks); { int i = 0; for (Book book : books) { if (results.get(i++)) booksWithSiteMapUrl.add(book); } assert i == numBooks; } } int booksWithSiteMapUrlSize = booksWithSiteMapUrl.size(); if (booksWithSiteMapUrlSize > 0) { if (booksWithSiteMapUrlSize > 1) { // Concurrent implementation List<Callable<ReadableInstant>> lastModifiedTasks = new ArrayList<Callable<ReadableInstant>>( booksWithSiteMapUrlSize); { for (final Book book : booksWithSiteMapUrl) { lastModifiedTasks.add(new Callable<ReadableInstant>() { @Override public ReadableInstant call() throws ServletException, IOException { HttpServletRequest subrequest = new HttpServletSubRequest(threadSafeReq); HttpServletResponse subresponse = new HttpServletSubResponse(threadSafeResp, tempFileContext); if (logger.isLoggable(Level.FINE)) logger.log(Level.FINE, "called, subrequest={0}, book={1}", new Object[] { subrequest, book }); return SiteMapServlet.getLastModified(servletContext, subrequest, subresponse, views, book); } }); } } List<ReadableInstant> lastModifieds; try { lastModifieds = semanticCMS.getExecutors().getPerProcessor().callAll(lastModifiedTasks); } catch (InterruptedException e) { throw new ServletException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) throw (RuntimeException) cause; if (cause instanceof ServletException) throw (ServletException) cause; if (cause instanceof IOException) throw (IOException) cause; throw new ServletException(cause); } List<Tuple2<Book, ReadableInstant>> sitemapBooks = new ArrayList<Tuple2<Book, ReadableInstant>>( booksWithSiteMapUrlSize); for (int i = 0; i < booksWithSiteMapUrlSize; i++) { sitemapBooks.add(new Tuple2<Book, ReadableInstant>(booksWithSiteMapUrl.get(i), lastModifieds.get(i))); } return sitemapBooks; } else { // Single implementation Book book = booksWithSiteMapUrl.get(0); return Collections.singletonList(new Tuple2<Book, ReadableInstant>(book, SiteMapServlet.getLastModified(servletContext, req, resp, views, book))); } } else { return Collections.emptyList(); } } else { // Sequential implementation List<Tuple2<Book, ReadableInstant>> sitemapBooks = new ArrayList<Tuple2<Book, ReadableInstant>>( numBooks); for (Book book : books) { if (hasSiteMapUrl(servletContext, req, resp, views, book, book.getContentRoot())) { sitemapBooks.add(new Tuple2<Book, ReadableInstant>(book, SiteMapServlet.getLastModified(servletContext, req, resp, views, book))); } } return sitemapBooks; } } /** * Last modified is known only when the last modified is known for all books, * and it is the most recent of all the per-book last modified. */ @Override protected long getLastModified(HttpServletRequest req) { try { ReadableInstant mostRecent = null; for (Tuple2<Book, ReadableInstant> sitemapBook : getSitemapBooks(getServletContext(), req, (HttpServletResponse) req.getAttribute(RESPONSE_IN_REQUEST_ATTRIBUTE))) { ReadableInstant lastModified = sitemapBook.getElement2(); // If any single book is unknown, the overall result is unknown if (lastModified == null) { mostRecent = null; break; } if (mostRecent == null || (lastModified.compareTo(mostRecent) > 0)) { mostRecent = lastModified; } } return mostRecent == null ? -1 : mostRecent.getMillis(); } catch (ServletException e) { log("getLastModified failed", e); return -1; } catch (IOException e) { log("getLastModified failed", e); return -1; } } @Override protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { List<Tuple2<Book, ReadableInstant>> sitemapBooks = getSitemapBooks(getServletContext(), req, resp); final DateTimeFormatter iso8601 = ISODateTimeFormat.dateTime(); resp.resetBuffer(); resp.setContentType(CONTENT_TYPE); resp.setCharacterEncoding(ENCODING); PrintWriter out = resp.getWriter(); out.println("<?xml version=\"1.0\" encoding=\"" + ENCODING + "\"?>"); out.println("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); for (Tuple2<Book, ReadableInstant> sitemapBook : sitemapBooks) { writeSitemap(req, resp, out, sitemapBook.getElement1(), sitemapBook.getElement2(), iso8601); } out.println("</sitemapindex>"); } /** * Checks if the sitemap has at least one page. * This version implemented as a traversal. */ private static boolean hasSiteMapUrl(final ServletContext servletContext, final HttpServletRequest req, final HttpServletResponse resp, final SortedSet<View> views, final Book book, PageRef pageRef) throws ServletException, IOException { Boolean result = CapturePage.traversePagesAnyOrder(servletContext, req, resp, pageRef, CaptureLevel.META, new CapturePage.PageHandler<Boolean>() { @Override public Boolean handlePage(Page page) throws ServletException, IOException { // TODO: Chance for more concurrency here by view? for (View view : views) { if (view.getAllowRobots(servletContext, req, resp, page) && view.isApplicable(servletContext, req, resp, page)) { return true; } } return null; } }, new CapturePage.TraversalEdges() { @Override public Set<ChildRef> getEdges(Page page) { return page.getChildRefs(); } }, new CapturePage.EdgeFilter() { @Override public boolean applyEdge(PageRef childPage) { return book.getBookRef().equals(childPage.getBookRef()); } }); assert result == null || result : "Should always be null or true"; return result != null; } }