Example usage for org.apache.lucene.document DateTools stringToDate

List of usage examples for org.apache.lucene.document DateTools stringToDate

Introduction

In this page you can find the example usage for org.apache.lucene.document DateTools stringToDate.

Prototype

public static Date stringToDate(String dateString) throws ParseException 

Source Link

Document

Converts a string produced by timeToString or dateToString back to a time, represented as a Date object.

Usage

From source file:com.appeligo.alerts.KeywordAlertChecker.java

License:Apache License

public boolean isNewMatch(KeywordAlert keywordAlert, Document doc) {

    String programId = doc.get("programID");
    if (KeywordMatch.getKeywordMatch(keywordAlert.getId(), programId) != null) {
        if (log.isDebugEnabled())
            log.debug("This is not a new match, so keyword alert has already been sent.");
        return false;
    }/* www. j  a va  2  s.  c  o m*/

    Date endTime;
    try {
        String endTimeString = doc.get("lineup-" + liveLineup + "-endTime");
        if (endTimeString == null) {
            log.error("Software bug that 'endTime' for lineup " + liveLineup + " was not found for program id "
                    + programId);
            return false;
        }
        endTime = DateTools.stringToDate(endTimeString);
    } catch (ParseException e) {
        log.error(
                "Software bug resulted in exception with document 'endTime' format in lucene document for program id "
                        + programId,
                e);
        return false;
    }

    new KeywordMatch(keywordAlert, programId, endTime).insert();

    return true;
}

From source file:com.appeligo.alerts.KeywordAlertChecker.java

License:Apache License

public void sendMessages(KeywordAlert keywordAlert, String fragments, Document doc, String messagePrefix) {

    User user = keywordAlert.getUser();/*ww w.  j av  a 2  s  .com*/
    if (user == null) {
        return;
    }
    String programId = doc.get("programID");
    String programTitle = doc.get("programTitle");

    if (log.isDebugEnabled())
        log.debug("keywordAlert: " + keywordAlert.getUserQuery() + ", sending message to "
                + (user == null ? null : user.getUsername()));

    try {
        // Use the user's lineup to determine the start time of this program which might air at different times for diff timezones
        String startTimeString = doc.get("lineup-" + user.getLineupId() + "-startTime");
        if (startTimeString == null) {
            // This user doesn't have the channel or program that our local feed has
            if (log.isDebugEnabled()) {
                String station = doc.get("lineup-" + liveLineup + "-stationName");
                log.debug("No startTime for station " + station + ", program " + programTitle + ", lineup="
                        + user.getLineupId() + ", start time from live lineup="
                        + doc.get("lineup-" + liveLineup + "-startTime"));
            }
            return;
        }
        Date startTime = DateTools.stringToDate(startTimeString);
        Date endTime = DateTools.stringToDate(doc.get("lineup-" + user.getLineupId() + "-endTime"));
        long durationMinutes = (endTime.getTime() - startTime.getTime()) / (60 * 1000);

        Date now = new Date();
        boolean future = endTime.after(now);
        boolean onAirNow = startTime.before(now) && future;
        boolean past = !(future || onAirNow);

        ProgramType programType = ProgramType.fromProgramID(programId);

        boolean uniqueProgram = false;
        if (programType == ProgramType.EPISODE || programType == ProgramType.SPORTS
                || programType == ProgramType.MOVIE) {
            uniqueProgram = true;
        }

        Map<String, String> context = new HashMap<String, String>();

        boolean includeDate;
        DateFormat format;
        if (Math.abs(startTime.getTime() - System.currentTimeMillis()) < 12 * 60 * 60 * 1000) {
            format = DateFormat.getTimeInstance(DateFormat.SHORT);
            includeDate = false;
        } else {
            format = new SimpleDateFormat("EEEE, MMMM d 'at' h:mm a");
            includeDate = true;
        }
        format.setTimeZone(user.getTimeZone());
        context.put("startTime", format.format(startTime));
        if (includeDate) {
            format = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT);
            format.setTimeZone(user.getTimeZone());
        }
        context.put("shortStartTime", format.format(startTime));

        context.put("durationMinutes", Long.toString(durationMinutes));
        // Use the SDTW-C lineup because this is how we know the right channel (station callsign) where we caught the
        // keyword.
        String stationName = doc.get("lineup-" + liveLineup + "-stationName");
        context.put("stationName", stationName);
        boolean sameStation = false;
        if (stationName.equals(doc.get("lineup-" + user.getLineupId() + "-stationName"))) {
            sameStation = true;
        }
        context.put("stationCallSign", doc.get("lineup-" + liveLineup + "-stationCallSign"));

        if (sameStation) {
            if (onAirNow) {
                context.put("timeChannelIntro", "We have been monitoring <b>" + stationName
                        + "</b>, and your topic was recently mentioned on the following program:");
            } else if (future) {
                if (uniqueProgram) {
                    context.put("timeChannelIntro", "We are monitoring <b>" + stationName
                            + "</b>, and your topic will be mentioned on the following program:");
                } else {
                    context.put("timeChannelIntro",
                            "We are monitoring <b>" + stationName + "</b>, and your topic was mentioned on <b>"
                                    + programTitle + "</b>. It may be mentioned when this program airs again:");
                }
            } else {
                context.put("timeChannelIntro", "We have been monitoring <b>" + stationName
                        + "</b>, and your topic was mentioned on a program that aired in your area in the past. "
                        + "You may have an opportunity to see this program in the future:");
            }
        } else {
            if (onAirNow) {
                context.put("timeChannelIntro", "We have been monitoring <b>" + programTitle
                        + "</b>, and your topic was recently mentioned:");
            } else if (future) {
                if (uniqueProgram) {
                    context.put("timeChannelIntro", "We have been monitoring <b>" + programTitle
                            + "</b>, and your topic was mentioned.  You may have an opportunity to catch this program when it airs again according to the following schedule:");
                } else {
                    context.put("timeChannelIntro", "We have been monitoring <b>" + programTitle
                            + "</b>, and your topic was mentioned.  This program will air again as follows, but the topics may or may not be the same:");
                }
            } else {
                context.put("timeChannelIntro", "We have been monitoring <b>" + programTitle
                        + "</b>, and your topic was mentioned.  However, this program aired in your area in the past. "
                        + "You may have an opportunity to see this program in the future:");
            }
        }
        if (onAirNow) {
            context.put("startsAt", "Started at");
        } else if (future) {
            if (includeDate) {
                context.put("startsAt", "Starts on");
            } else {
                context.put("startsAt", "Starts at");
            }
        } else {
            if (includeDate) {
                context.put("startsAt", "Last aired on");
            } else {
                context.put("startsAt", "Previously aired at");
            }
        }
        context.put("lcStartsAt", context.get("startsAt").toLowerCase());

        String webPath = doc.get("webPath");
        if (webPath == null) {
            webPath = DefaultEpg.getInstance().getProgram(programId).getWebPath();
        }
        if (webPath.charAt(0) == '/') {
            webPath = webPath.substring(1);
        }
        String reducedTitle40 = doc.get("reducedTitle40");
        if (reducedTitle40 == null) {
            reducedTitle40 = DefaultEpg.getInstance().getProgram(programId).getReducedTitle40();
        }
        String programLabel = doc.get("programLabel");
        if (programLabel == null) {
            programLabel = DefaultEpg.getInstance().getProgram(programId).getLabel();
        }
        context.put("programId", programId);
        context.put("webPath", webPath);
        context.put("programLabel", programLabel);
        context.put("reducedTitle40", reducedTitle40);
        if (doc.get("description").trim().length() > 0) {
            context.put("description", "Description: " + doc.get("description") + "<br/>");
        } else {
            context.put("description", "");
        }
        if (fragments == null || fragments.trim().length() == 0) {
            context.put("fragments", "");
        } else {
            context.put("fragments", "Relevant Dialogue: <i>" + fragments + "</i><br/>");
        }
        context.put("query", keywordAlert.getUserQuery());
        context.put("keywordAlertId", Long.toString(keywordAlert.getId()));
        String greeting = user.getUsername();
        context.put("username", greeting);
        String firstName = user.getFirstName();
        if (firstName != null && firstName.trim().length() > 0) {
            greeting = firstName;
        }
        context.put("greeting", greeting);

        format = DateFormat.getTimeInstance(DateFormat.SHORT);
        format.setTimeZone(user.getTimeZone());
        context.put("now", format.format(new Date()));

        ScheduledProgram futureProgram = DefaultEpg.getInstance().getNextShowing(user.getLineupId(), programId,
                false, false);
        if (uniqueProgram) {
            String typeString = null;
            if (programType == ProgramType.EPISODE) {
                typeString = "episode";
            } else if (programType == ProgramType.SPORTS) {
                typeString = "game";
            } else {
                typeString = "movie";
            }
            if (futureProgram != null) {
                String timePreposition = null;
                if ((futureProgram.getStartTime().getTime() - System.currentTimeMillis()) < 12 * 60 * 60
                        * 1000) {
                    timePreposition = "at ";
                    format = DateFormat.getTimeInstance(DateFormat.SHORT);
                } else {
                    timePreposition = "on ";
                    format = new SimpleDateFormat("EEEE, MMMM d 'at' h:mm a");
                }
                format.setTimeZone(user.getTimeZone());
                context.put("rerunInfo", "You can still catch this " + typeString
                        + " in its entirety!  It's scheduled to replay " + timePreposition
                        + format.format(futureProgram.getStartTime()) + " on "
                        + futureProgram.getNetwork().getStationName() + ". Do you want to <a href=\"" + url
                        + webPath + "#addreminder\">set a reminder</a> to be notified the next time this "
                        + typeString + " airs?");
            } else {
                if (programType == ProgramType.SPORTS) {
                    context.put("rerunInfo", "");
                } else {
                    if (onAirNow) {
                        context.put("rerunInfo",
                                "If it's too late to flip on the program now, you can <a href=\"" + url
                                        + webPath
                                        + "#addreminder\">set a reminder</a> to be notified the next time this "
                                        + typeString + " airs.");
                    } else {
                        context.put("rerunInfo",
                                "You can <a href=\"" + url + webPath
                                        + "#addreminder\">set a reminder</a> to be notified the next time this "
                                        + typeString + " airs.");
                    }
                }
            }
        } else {
            if ((futureProgram != null) && futureProgram.isNewEpisode()) {
                context.put("rerunInfo",
                        "The next airing of this show will be new content, and is <i>not a rerun</i>,"
                                + " so these same topics may or may not be discussed."
                                + "  You may still be interested in catching future airings, and you can"
                                + " <a href=\"" + url + webPath
                                + "#addreminder\">set a Flip.TV reminder for this show</a>.");
            } else {
                context.put("rerunInfo",
                        "The broadcaster did not provide enough information to know which future airings,"
                                + " if any, are identical reruns with the same topics mentioned."
                                + "  You may still be interested in catching future airings, and you can"
                                + " <a href=\"" + url + webPath
                                + "#addreminder\">set a Flip.TV reminder for this show</a>.");
            }
        }

        if (keywordAlert.getTodaysAlertCount() == keywordAlert.getMaxAlertsPerDay()) {
            context.put("maxAlertsExceededSentence",
                    "You asked to stop receiving alerts for this topic after receiving "
                            + keywordAlert.getMaxAlertsPerDay()
                            + " alerts in a single day. That limit has been reached. You can change this setting"
                            + " at any time.  Otherwise, we will resume sending alerts"
                            + " for this topic tomorrow.");
        } else {
            context.put("maxAlertsExceededSentence", "");
        }

        if (keywordAlert.isUsingPrimaryEmailRealtime()) {
            Message message = new Message(messagePrefix + "_email", context);
            message.setUser(user);
            message.setTo(user.getPrimaryEmail());
            if (log.isDebugEnabled())
                log.debug("Sending email message to: " + user.getPrimaryEmail());
            message.insert();
        }
        if (keywordAlert.isUsingSMSRealtime() && user.getSmsEmail().trim().length() > 0) {
            Message message = new Message(messagePrefix + "_sms", context);
            message.setTo(user.getSmsEmail());
            message.setUser(user);
            message.setSms(true);
            if (log.isDebugEnabled())
                log.debug("Sending sms message to: " + user.getSmsEmail());
            message.insert();
        }
    } catch (NumberFormatException e) {
        log.error("Couldn't process lucene document for program " + programId, e);
    } catch (MessageContextException e) {
        log.error("Software bug resulted in exception with email message context or configuration", e);
    } catch (ParseException e) {
        log.error(
                "Software bug resulted in exception with document 'startTime' or 'endTime' format in lucene document for program id "
                        + programId,
                e);
    }
}

From source file:com.appeligo.lucene.DocumentWrapper.java

License:Apache License

public Date getDate(String name) {
    String value = doc.get(name);
    if (value != null) {
        try {/*from   w  w  w  . j  a  va 2s .  c o m*/
            return DateTools.stringToDate(value);
        } catch (ParseException e) {
            log.warn("Error parsing date.", e);
        }
    }
    return null;
}

From source file:com.appeligo.search.actions.SearchResult.java

License:Apache License

/**
 * @param doc/*from w ww . j  a v  a 2s  .  c o m*/
 * @param lastShowing
 * @param nextShowing
 * @param programInfo
 */
public SearchResult(String lineup, DocumentWrapper doc, Program programInfo, ScheduledProgram lastShowing,
        ScheduledProgram nextShowing) {
    this.doc = doc;
    this.lineup = lineup;
    programId = doc.get("programID");
    programTitle = doc.get("programTitle");
    episodeTitle = doc.get("episodeTitle");
    stationName = doc.get("lineup-" + lineup + "-stationName");
    stationCallSign = doc.get("lineup-" + lineup + "-stationCallSign");
    description = doc.get("description");
    try {
        airing = DateTools.stringToDate(doc.get("lineup-" + lineup + "-startTime"));
        endTime = DateTools.stringToDate(doc.get("lineup-" + lineup + "-endTime"));
        Date now = new Date();
        future = endTime.after(now);
        onAir = airing.before(now) && future;
    } catch (ParseException e) {
        log.error("Couldn't parse start or end time for " + programId, e);
    }

    this.lastShowing = lastShowing;
    this.nextShowing = nextShowing;
    this.programInfo = programInfo;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits)
        throws ParseException {
    SearchResult result = new SearchResult();
    result.hitId = hitId;//from w w w. ja v a 2 s  .c  o m
    result.totalHits = totalHits;
    result.score = score;
    result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
    result.summary = doc.get(FIELD_SUMMARY);
    result.author = doc.get(FIELD_AUTHOR);
    result.committer = doc.get(FIELD_COMMITTER);
    result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
    result.branch = doc.get(FIELD_BRANCH);
    result.commitId = doc.get(FIELD_COMMIT);
    result.issueId = doc.get(FIELD_ISSUE);
    result.path = doc.get(FIELD_PATH);
    if (doc.get(FIELD_TAG) != null) {
        result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
    }
    if (doc.get(FIELD_LABEL) != null) {
        result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
    }
    return result;
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits)
        throws ParseException {
    SearchResult result = new SearchResult();
    result.hitId = hitId;//from  ww  w .j  a va  2s  . c o  m
    result.totalHits = totalHits;
    result.score = score;
    result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
    result.summary = doc.get(FIELD_SUMMARY);
    result.author = doc.get(FIELD_AUTHOR);
    result.committer = doc.get(FIELD_COMMITTER);
    result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
    result.branch = doc.get(FIELD_BRANCH);
    result.commitId = doc.get(FIELD_COMMIT);
    result.path = doc.get(FIELD_PATH);
    if (doc.get(FIELD_TAG) != null) {
        result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
    }
    return result;
}

From source file:com.github.lucene.store.CreateTestIndex.java

License:Apache License

private static Document getDocument(final String rootDir, final File file) throws IOException, ParseException {
    final Properties props = new Properties();
    props.load(new FileInputStream(file));
    String category = file.getParent().substring(rootDir.length());
    category = category.replace(File.separatorChar, '/');

    final String isbn = props.getProperty("isbn");
    final String title = props.getProperty("title");
    final String authors = props.getProperty("author");
    final String url = props.getProperty("url");
    final String subject = props.getProperty("subject");
    final String pubmonth = props.getProperty("pubmonth");
    final Date date = DateTools.stringToDate(pubmonth);
    final String[] contents = new String[] { title, subject, authors, category };

    System.out.println(//  w  w  w.jav  a2 s. c  o  m
            title + "\n" + authors + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------");

    final Document doc = new Document();
    doc.add(new StringField("isbn", isbn, Field.Store.YES));
    doc.add(new StringField("category", category, Field.Store.YES));
    doc.add(new TextField("title", title, Field.Store.YES));
    doc.add(new StringField("title2", title.toLowerCase(), Field.Store.YES));
    for (final String author : authors.split(",")) {
        doc.add(new StringField("author", author, Field.Store.YES));
    }
    doc.add(new StringField("url", url, Field.Store.YES));
    doc.add(new TextField("subject", subject, Field.Store.YES));
    doc.add(new IntField("pubmonth", Integer.parseInt(pubmonth), Field.Store.YES));
    doc.add(new IntField("pubmonthAsDay", (int) (date.getTime() / (1000 * 3600 * 24)), Field.Store.NO));
    for (final String text : contents) {
        doc.add(new TextField("contents", text, Field.Store.NO));
    }
    return doc;
}

From source file:com.knowgate.lucene.NewsMessageSearcher.java

License:Open Source License

/**
 * Compose a Lucene query based on given parameters
 * @param sLuceneIndexPath String Base path for Lucene indexes excluding WorkArea and table name
 * @param sWorkArea String GUID of WorkArea to be searched, cannot be null
 * @param sGroup sNewsGroupCategoryName String GUID or Category Name of NewsGroup to which message belongs (optional, may be null)
 * @param sAuthor String/*  w  w w . j  a  v a  2  s . co  m*/
 * @param sTitle String
 * @param sText String
 * @param iLimit int
 * @param oSortBy Comparator
 * @return NewsMessageRecord[] An Array of NewsMessageRecord objects or <b>null</b> if no messages where found matching the given criteria
 * @throws ParseException
 * @throws IOException
 * @throws NullPointerException
 */
public static NewsMessageRecord[] search(String sLuceneIndexPath, String sWorkArea,
        String sNewsGroupCategoryName, String sAuthor, String sTitle, Date dtFromDate, Date dtToDate,
        String sText, int iLimit, Comparator oSortBy) throws ParseException, IOException, NullPointerException {

    if (null == sLuceneIndexPath)
        throw new NullPointerException("NewsMessageSearcher.search() luceneindex parameter cannot be null");

    if (null == sWorkArea)
        throw new NullPointerException("NewsMessageSearcher.search() workarea parameter cannot be null");

    if (DebugFile.trace) {
        DebugFile.writeln("Begin NewsMessageSearcher.search(" + sLuceneIndexPath + "," + sWorkArea + ","
                + sNewsGroupCategoryName + "," + sAuthor + "," + sTitle + "," + dtFromDate + "," + dtToDate
                + "," + sText + "," + String.valueOf(iLimit) + ")");
        DebugFile.incIdent();
    }

    NewsMessageRecord[] aRetArr;

    BooleanQuery oQrx = new BooleanQuery();

    oQrx.add(new TermQuery(new Term("workarea", sWorkArea)), BooleanClause.Occur.MUST);

    if (null != sNewsGroupCategoryName)
        oQrx.add(new TermQuery(new Term("container", sNewsGroupCategoryName)), BooleanClause.Occur.MUST);

    if (dtFromDate != null && dtToDate != null)
        oQrx.add(
                new TermRangeQuery("created", DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY),
                        DateTools.dateToString(dtToDate, DateTools.Resolution.DAY), true, true),
                BooleanClause.Occur.MUST);
    else if (dtFromDate != null)
        oQrx.add(new TermRangeQuery("created", DateTools.dateToString(dtFromDate, DateTools.Resolution.DAY),
                null, true, false), BooleanClause.Occur.MUST);

    else if (dtToDate != null)
        oQrx.add(new TermRangeQuery("created", null, DateTools.dateToString(dtToDate, DateTools.Resolution.DAY),
                false, true), BooleanClause.Occur.MUST);

    BooleanQuery oQry = new BooleanQuery();

    if (null != sAuthor)
        oQry.add(new TermQuery(new Term("author", sAuthor)), BooleanClause.Occur.SHOULD);

    if (null != sTitle)
        oQry.add(new TermQuery(new Term("title", sTitle)), BooleanClause.Occur.SHOULD);

    if (null != sText)
        oQry.add(new TermQuery(new Term("text", escape(Gadgets.ASCIIEncode(sText).toLowerCase()))),
                BooleanClause.Occur.SHOULD);

    oQrx.add(oQry, BooleanClause.Occur.MUST);

    String sSegments = Gadgets.chomp(sLuceneIndexPath, File.separator) + "k_newsmsgs" + File.separator
            + sWorkArea;
    if (DebugFile.trace)
        DebugFile.writeln("new IndexSearcher(" + sSegments + ")");
    Directory oDir = Indexer.openDirectory(sSegments);
    IndexSearcher oSearch = new IndexSearcher(oDir);

    Document oDoc;

    if (DebugFile.trace)
        DebugFile.writeln("IndexSearcher.search(" + oQrx.toString() + ")");
    TopDocs oTopSet = oSearch.search(oQrx, null, iLimit > 0 ? iLimit : 2147483647);
    if (oTopSet.scoreDocs != null) {
        ScoreDoc[] oTopDoc = oTopSet.scoreDocs;
        final int iDocCount = oTopDoc.length <= iLimit ? oTopDoc.length : iLimit;
        aRetArr = new NewsMessageRecord[iDocCount];
        for (int d = 0; d < iDocCount; d++) {
            oDoc = oSearch.doc(oTopDoc[d].doc);
            try {
                aRetArr[d] = new NewsMessageRecord(oTopDoc[d].score, oDoc.get("workarea"), oDoc.get("guid"),
                        oDoc.get("thread"), oDoc.get("container"), oDoc.get("title"), oDoc.get("author"),
                        DateTools.stringToDate(oDoc.get("created")), oDoc.get("abstract"));
            } catch (java.text.ParseException neverthrown) {
                throw new ParseException("NewsMessageSearcher.search() Error parsing date "
                        + oDoc.get("created") + " of document " + oDoc.get("guid"));
            }
        } // next
    } else {
        aRetArr = null;
    }

    oSearch.close();
    oDir.close();

    if (oSortBy != null) {
        Arrays.sort(aRetArr, oSortBy);
    }

    if (DebugFile.trace) {
        DebugFile.decIdent();
        if (null == aRetArr)
            DebugFile.writeln("End NewsMessageSearcher.search() : no records found");
        else
            DebugFile.writeln("End NewsMessageSearcher.search() : " + String.valueOf(aRetArr.length));
    }
    return aRetArr;
}

From source file:com.leavesfly.lia.commom.CreateTestIndex.java

License:Apache License

public static Document getDocument(String rootDir, File file) throws IOException {
    Properties props = new Properties();
    props.load(new FileInputStream(file));

    Document doc = new Document();

    // category comes from relative path below the base directory
    String category = file.getParent().substring(rootDir.length()); //1
    category = category.replace(File.separatorChar, '/'); //1

    String isbn = props.getProperty("isbn"); //2
    String title = props.getProperty("title"); //2
    String author = props.getProperty("author"); //2
    String url = props.getProperty("url"); //2
    String subject = props.getProperty("subject"); //2

    String pubmonth = props.getProperty("pubmonth"); //2

    System.out.println(//ww  w  .  ja v  a 2 s.  c  o  m
            title + "\n" + author + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------");

    doc.add(new Field("isbn", // 3
            isbn, // 3
            Field.Store.YES, // 3
            Field.Index.NOT_ANALYZED)); // 3
    doc.add(new Field("category", // 3
            category, // 3
            Field.Store.YES, // 3
            Field.Index.NOT_ANALYZED)); // 3
    doc.add(new Field("title", // 3
            title, // 3
            Field.Store.YES, // 3
            Field.Index.ANALYZED, // 3
            Field.TermVector.WITH_POSITIONS_OFFSETS)); // 3
    doc.add(new Field("title2", // 3
            title.toLowerCase(), // 3
            Field.Store.YES, // 3
            Field.Index.NOT_ANALYZED_NO_NORMS, // 3
            Field.TermVector.WITH_POSITIONS_OFFSETS)); // 3

    // split multiple authors into unique field instances
    String[] authors = author.split(","); // 3
    for (String a : authors) { // 3
        doc.add(new Field("author", // 3
                a, // 3
                Field.Store.YES, // 3
                Field.Index.NOT_ANALYZED, // 3
                Field.TermVector.WITH_POSITIONS_OFFSETS)); // 3
    }

    doc.add(new Field("url", // 3
            url, // 3
            Field.Store.YES, // 3
            Field.Index.NOT_ANALYZED_NO_NORMS)); // 3
    doc.add(new Field("subject", // 3  //4
            subject, // 3  //4
            Field.Store.YES, // 3  //4
            Field.Index.ANALYZED, // 3  //4
            Field.TermVector.WITH_POSITIONS_OFFSETS)); // 3  //4

    doc.add(new NumericField("pubmonth", // 3
            Field.Store.YES, // 3
            true).setIntValue(Integer.parseInt(pubmonth))); // 3

    Date d; // 3
    try { // 3
        d = DateTools.stringToDate(pubmonth); // 3
    } catch (ParseException pe) { // 3
        throw new RuntimeException(pe); // 3
    } // 3
    doc.add(new NumericField("pubmonthAsDay") // 3
            .setIntValue((int) (d.getTime() / (1000 * 3600 * 24)))); // 3

    for (String text : new String[] { title, subject, author, category }) { // 3 // 5
        doc.add(new Field("contents", text, // 3 // 5
                Field.Store.NO, Field.Index.ANALYZED, // 3 // 5
                Field.TermVector.WITH_POSITIONS_OFFSETS)); // 3 // 5
    }

    return doc;
}

From source file:com.lucid.touchstone.data.BasicDocMaker.java

License:Apache License

private Document createDocument(DocData docData, int size, int cnt) throws Exception {
    int docid = incrNumDocsCreated();
    Document doc = new Document();
    doc.add(new Field(ID_FIELD, "doc" + docid, storeVal, indexVal, termVecVal));
    if (docData.name != null) {
        String name = (cnt < 0 ? docData.name : docData.name + "_" + cnt);
        doc.add(new Field(NAME_FIELD, name, storeVal, indexVal, termVecVal));
    }//ww  w .  java  2  s  .c  o  m
    if (docData.date != null) {
        String dateStr = DateTools.dateToString(DateTools.stringToDate(docData.date),
                DateTools.Resolution.SECOND);
        doc.add(new Field(DATE_FIELD, dateStr, storeVal, indexVal, termVecVal));
    }
    if (docData.title != null) {
        doc.add(new Field(TITLE_FIELD, docData.title, storeVal, indexVal, termVecVal));
    }
    if (docData.body != null && docData.body.length() > 0) {
        String bdy;
        if (size <= 0 || size >= docData.body.length()) {
            bdy = docData.body; // use all
            docData.body = ""; // nothing left
        } else {
            // attempt not to break words - if whitespace found within next 20 chars...
            for (int n = size - 1; n < size + 20 && n < docData.body.length(); n++) {
                if (Character.isWhitespace(docData.body.charAt(n))) {
                    size = n;
                    break;
                }
            }
            bdy = docData.body.substring(0, size); // use part
            docData.body = docData.body.substring(size); // some left
        }
        doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
        if (storeBytes == true) {
            doc.add(new Field(BYTES_FIELD, bdy.getBytes("UTF-8")));
        }
    }

    //    if (docData.getProps()!=null) {
    //      for (Iterator it = docData.getProps().keySet().iterator(); it.hasNext(); ) {
    //        String key = (String) it.next();
    //        String val = (String) docData.getProps().get(key);
    //        doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
    //      }
    //      docData.setProps(null);
    //    }
    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
    return doc;
}