1 /*jslint 2 browser: true, 3 nomen: false, 4 debug: true, 5 forin: true, 6 undef: true, 7 white: false, 8 onevar: false 9 */ 10 var sc; 11 12 /** 13 * A library to shorten text 14 * @constructor 15 */ 16 function SpazShortText() { 17 18 this.map = {}; 19 20 this.genBaseMaps(); 21 this.processBaseMaps(); 22 23 24 } 25 26 27 28 /** 29 * internal function to generate the default long -> short maps 30 */ 31 SpazShortText.prototype.genBaseMaps = function() { 32 33 this.basemap = { 34 35 /* 36 txtspeak type stuff 37 */ 38 'about' :'abt', 39 'account' :'acct', 40 'address' :'addy', 41 'anyone' :'ne1', 42 'and' :'&', 43 'at' :'@', 44 'at the moment' :'atm', 45 'back' :'bk', 46 'be right back' :'brb', 47 'be back later' :'bbl', 48 'be back soon' :'bbs', 49 'because' :'b/c', 50 'boyfriend' :'bf', 51 'but' :'but', 52 'girlfriend' :'gf', 53 'between' :'b/t', 54 'by the way' :'btw', 55 'definitely' :'def', 56 'everyone' :'evr1', 57 'favorite' :'fav', 58 'for' :'fr', 59 'from' :'frm', 60 'for example' :'Fr ex', 61 'follow' :'fllw', 62 'follower' :'fllwr', 63 'followers' :'fllwrs', 64 'following' :'fllwng', 65 'good' :'gd', 66 'got' :'gt', 67 'having' :'hvg', 68 'hours' :'hrs', 69 'i don\'t know' :'idk', 70 'if i recall correctly' :'iirc', 71 'in my opinion' :'imo', 72 'in my humble opinion' :'imho', 73 'just' :'jst', 74 'little' :'lttl', 75 'love' :'<3', 76 'message' :'msg', 77 'midnight' :'12am', 78 'never mind' :'nm', 79 'no problem' :'np', 80 'not much' :'nm', 81 'pages' :'pgs', 82 'pictures' :'pics', 83 'obviously' :'obvs', 84 'please' :'pls', 85 'seriously' :'srsly', 86 'something' :'s/t', 87 'sorry' :'sry', 88 'text' :'txt', 89 'thanks' :'thx', 90 'think' :'thk', 91 'to be honest' :'tbh', 92 'though' :'tho', 93 'through' :'thru', 94 'weeks' :'wks', 95 'with' :'w', 96 'without' :'w/o', 97 98 'that' :'tht', 99 'what' :'wht', 100 'have' :'hv', 101 'don\'t' :'dnt', 102 'was' :'ws', 103 'well' :'wll', 104 'right' :'rt', 105 'here' :'hr', 106 'going' :'gng', 107 'like' :'lk', 108 'can' :'cn', 109 'want' :'wnt', 110 'that\'s' :'thts', 111 'there' :'thr', 112 'come' :'cme', 113 'really' :'rly', 114 'would' :'wld', 115 'look' :'lk', 116 'when' :'whn', 117 'okay' :'ok', 118 'can\'t' :'cnt', 119 'tell' :'tll', 120 'I\'ll' :'Ill', 121 'could' :'cl', 122 'didn\'t' :'ddnt', 123 'yes' :'y', 124 'had' :'hd', 125 'then' :'thn', 126 'take' :'tke', 127 'make' :'mk', 128 'gonna' :'gna', 129 'never' :'nvr', 130 'them' :'thm', 131 'more' :'mr', 132 'over' :'ovr', 133 'where' :'whr', 134 'what\'s' :'whts', 135 'thing' :'thg', 136 'maybe' :'mybe', 137 'down' :'dwn', 138 'very' :'very', 139 'should' :'shld', 140 'anything' :'nethg', 141 'said' :'sd', 142 'any' :'ne', 143 'even' :'evn', 144 'thank' :'thk', 145 'give' :'gve', 146 'thought' :'thot', 147 'help' :'hlp', 148 'talk' :'tlk', 149 'people' :'ppl', 150 'find' :'fnd', 151 'nothing' :'nthg', 152 'again' :'agn', 153 'things' :'thgs', 154 'call' :'cll', 155 'told' :'tld', 156 'great' :'grt', 157 'before' :'b4', 158 'better' :'bttr', 159 'ever' :'evr', 160 'night' :'nite', 161 'than' :'thn', 162 'away' :'awy', 163 'first' :'1st', 164 'believe' :'blve', 165 'other' :'othr', 166 'everything' :'evrythg', 167 'work' :'wrk', 168 'fine' :'fne', 169 'home' :'hme', 170 'after' :'aftr', 171 'last' :'lst', 172 'keep' :'kp', 173 'around' :'arnd', 174 'stop' :'stp', 175 'long' :'lng', 176 'always' :'alwys', 177 'listen' :'lstn', 178 'wanted' :'wntd', 179 'happened' :'hppnd', 180 'won\'t' :'wnt', 181 'trying' :'tryng', 182 'kind' :'knd', 183 'wrong' :'wrng', 184 'talking' :'tlkg', 185 'being' :'bng', 186 'bad' :'bd', 187 'remember' :'rmbr', 188 'getting' :'gttg', 189 'together' :'togthr', 190 'mother' :'mom', 191 'understand' :'undrstd', 192 'wouldn\'t' :'wldnt', 193 'actually' :'actly', 194 'baby' :'bby', 195 'father' :'dad', 196 'done' :'dne', 197 'wasn\'t' :'wsnt', 198 'might' :'mite', 199 'every' :'evry', 200 'enough' :'engh', 201 'someone' :'sm1', 202 'family' :'fmly', 203 'whole' :'whl', 204 'another' :'anthr', 205 'jack' :'jck', 206 'yourself' :'yrslf', 207 'best' :'bst', 208 'must' :'mst', 209 'coming' :'cmg', 210 'looking' :'lkg', 211 'woman' :'wmn', 212 'which' :'whch', 213 'years' :'yrs', 214 'room' :'rm', 215 'left' :'lft', 216 'tonight' :'2nte', 217 'real' :'rl', 218 'hmm' :'hm', 219 'happy' :'hpy', 220 'pretty' :'prty', 221 'girl' :'grl', 222 'show' :'shw', 223 'friend' :'frnd', 224 'already' :'alrdy', 225 'saying' :'syng', 226 'next' :'nxt', 227 'job' :'jb', 228 'problem' :'prblm', 229 'minute' :'min', 230 'found' :'fnd', 231 'world' :'wrld', 232 'thinking' :'thkg', 233 'haven\'t' :'hvnt', 234 'heard' :'hrd', 235 'honey' :'hny', 236 'matter' :'mttr', 237 'myself' :'myslf', 238 'couldn\'t' :'cldnt', 239 'exactly' :'xctly', 240 'probably' :'prob', 241 'happen' :'hppn', 242 'we\'ve' :'wve', 243 'hurt' :'hrt', 244 'both' :'bth', 245 'gotta' :'gtta', 246 'alone' :'alne', 247 'excuse' :'xcse', 248 'start' :'strt', 249 'today' :'2dy', 250 'ready' :'rdy', 251 'until' :'untl', 252 'whatever' :'wtevr', 253 'wants' :'wnts', 254 'hold' :'hld', 255 'yet' :'yt', 256 'took' :'tk', 257 'once' :'1ce', 258 'gone' :'gne', 259 'called' :'clld', 260 'morning' :'morn', 261 'supposed' :'sppsd', 262 'friends' :'frnds', 263 'stuff' :'stff', 264 'most' :'mst', 265 'used' :'usd', 266 'worry' :'wrry', 267 'second' :'2nd', 268 'part' :'prt', 269 'truth' :'trth', 270 'school' :'schl', 271 'forget' :'frgt', 272 'business' :'biz', 273 'cause' :'cuz', 274 'telling' :'tllg', 275 'chance' :'chnce', 276 'move' :'mv', 277 'person' :'prsn', 278 'somebody' :'smbdy', 279 'heart' :'hrt', 280 'point' :'pt', 281 'later' :'ltr', 282 'making' :'makg', 283 'anyway' :'nywy', 284 'many' :'mny', 285 'phone' :'phn', 286 'reason' :'rsn', 287 'looks' :'lks', 288 'bring' :'brng', 289 'turn' :'trn', 290 'tomorrow' :'tmrw', 291 'trust' :'trst', 292 'check' :'chk', 293 'change' :'chng', 294 'anymore' :'anymr', 295 'town' :'twn', 296 'aren\'t' :'rnt', 297 'working' :'wrkg', 298 'year' :'yr', 299 'taking' :'tkg', 300 'means' :'mns', 301 'brother' :'bro', 302 'play' :'ply', 303 'hate' :'h8', 304 'says' :'sez', 305 'beautiful' :'btfl', 306 'crazy' :'crzy', 307 'party' :'prty', 308 'afraid' :'afrd', 309 'important' :'imptnt', 310 'rest' :'rst', 311 'word' :'wrd', 312 'watch' :'wtch', 313 'glad' :'gld', 314 'sister' :'sistr', 315 'minutes' :'min', 316 'everybody' :'evrybdy', 317 'couple' :'cpl', 318 'either' :'ethr', 319 'feeling' :'flg', 320 'under' :'undr', 321 'break' :'brk', 322 'promise' :'prmse', 323 'easy' :'ez', 324 'question' :'q', 325 'doctor' :'doc', 326 'walk' :'wlk', 327 'trouble' :'trbl', 328 'different' :'diff', 329 'hospital' :'hsptl', 330 'anybody' :'anybdy', 331 'wedding' :'wddg', 332 'perfect' :'prfct', 333 'police' :'cops', 334 'waiting' :'wtng', 335 'dinner' :'din', 336 'against' :'agst', 337 'funny' :'fny', 338 'husband' :'hsbnd', 339 'child' :'kid', 340 'shouldn\'t' :'shldnt', 341 'half' :'1/2', 342 'moment' :'mmnt', 343 'sleep' :'slp', 344 'started' :'strtd', 345 'young' :'yng', 346 'sounds' :'snds', 347 'lucky' :'lky', 348 'sometimes' :'smtimes', 349 'plan' :'pln', 350 'serious' :'srs', 351 'ahead' :'ahd', 352 'week' :'wk', 353 'wonderful' :'wndfl', 354 'past' :'pst', 355 'number' :'#', 356 'nobody' :'nbdy', 357 'along' :'alng', 358 'finally' :'fnly', 359 'worried' :'wrrd', 360 'book' :'bk', 361 'sort' :'srt', 362 'safe' :'sfe', 363 'living' :'livg', 364 'children' :'kids', 365 'weren\'t' :'wrnt', 366 'front' :'frnt', 367 'loved' :'luvd', 368 'asking' :'askg', 369 'running' :'rnng', 370 'clear' :'clr', 371 'figure' :'fgr', 372 'felt' :'flt', 373 'parents' :'prnts', 374 'absolutely' :'abs', 375 'alive' :'alve', 376 'meant' :'mnt', 377 'happens' :'hppns', 378 'kidding' :'kddg', 379 'full' :'fl', 380 'meeting' :'mtg', 381 'coffee' :'cffe', 382 'sound' :'snd', 383 'women' :'wmn', 384 'welcome' :'wlcm', 385 'months' :'mnths', 386 'hour' :'hr', 387 'speak' :'spk', 388 'thinks' :'thks', 389 'Christmas' :'Xmas', 390 'possible' :'pssble', 391 'worse' :'wrs', 392 'company' :'co', 393 'mistake' :'mstk', 394 'handle' :'hndl', 395 'spend' :'spnd', 396 'totally' :'ttly', 397 'giving' :'gvg', 398 'control' :'ctrl', 399 'realize' :'rlze', 400 'power' :'pwr', 401 'president' :'pres', 402 'girls' :'grls', 403 'taken' :'tkn', 404 'picture' :'pic', 405 'talked' :'tlkd', 406 'hundred' :'hndrd', 407 'changed' :'chgd', 408 'completely' :'cmpltly', 409 'explain' :'exp', 410 'playing' :'plyg', 411 'relationship' :'rlshp', 412 'loves' :'lvs', 413 'fucking' :'fkg', 414 'anywhere' :'newhr', 415 'questions' :'qs', 416 'wonder' :'wndr', 417 'calling' :'cllg', 418 'somewhere' :'smwhr', 419 'straight' :'str8', 420 'fast' :'fst', 421 'words' :'wrds', 422 'worked' :'wrkd', 423 'light' :'lite', 424 'cannot' :'can\'t', 425 'protect' :'prtct', 426 'class' :'cls', 427 'surprise' :'sprise', 428 'sweetheart' :'swthrt', 429 'looked' :'lkd', 430 'except' :'xcpt', 431 'takes' :'tks', 432 'situation' :'sitn', 433 'besides' :'bsds', 434 'pull' :'pll', 435 'himself' :'hmslf', 436 'hasn\'t' :'hsnt', 437 'worth' :'wrth', 438 'amazing' :'amzg', 439 'given' :'gvn', 440 'expect' :'xpct', 441 'rather' :'rthr', 442 'black' :'blk', 443 'movie' :'film', 444 'country' :'cntry', 445 'perhaps' :'prhps', 446 'watching' :'wtchg', 447 'darling' :'darlg', 448 'honor' :'hnr', 449 'personal' :'prsnl', 450 'moving' :'movg', 451 'till' :'til', 452 'admit' :'admt', 453 'problems' :'prbs', 454 'information' :'info', 455 'honest' :'hnst', 456 'missed' :'mssd', 457 'longer' :'lngr', 458 'dollars' :'$s', 459 'evening' :'eve', 460 'starting' :'strtg', 461 'suppose' :'spps', 462 'street' :'st', 463 'sitting' :'sttg', 464 'favor' :'fvr', 465 'apartment' :'apt', 466 'court' :'crt', 467 'terrible' :'trrbl', 468 'clean' :'cln', 469 'learn' :'lrn', 470 'works' :'wks', 471 'relax' :'rlx', 472 'million' :'mil', 473 'prove' :'prv', 474 'smart' :'smrt', 475 'missing' :'missg', 476 'forgot' :'frgt', 477 'small' :'sm', 478 'interested' :'intrstd', 479 'table' :'tbl', 480 'become' :'bcm', 481 'pregnant' :'preg', 482 'middle' :'mddl', 483 'ring' :'rng', 484 'careful' :'crfl', 485 'figured' :'fgrd', 486 'stick' :'stk', 487 'stopped' :'stppd', 488 'standing' :'stndg', 489 'forgive' :'frgv', 490 'wearing' :'wearg', 491 'hoping' :'hopg', 492 'thousand' :'k', 493 'paper' :'ppr', 494 'tough' :'tuff', 495 'count' :'cnt', 496 'birthday' :'bday', 497 'history' :'hstry', 498 'share' :'shr', 499 'offer' :'offr', 500 'hurry' :'hrry', 501 'feet' :'ft', 502 'wondering' :'wonderg', 503 'building' :'buildg', 504 'ones' :'1s', 505 'finish' :'fin', 506 'would\'ve' :'wldve', 507 'interesting' :'intrstg', 508 'enjoy' :'njoy', 509 'road' :'rd', 510 'staying' :'stayg', 511 'short' :'shrt', 512 'finished' :'fin', 513 'respect' :'rspct', 514 'spent' :'spnt', 515 'attention' :'attn', 516 'holding' :'hldg', 517 'surprised' :'srprsd', 518 'keeping' :'kpg', 519 'putting' :'puttg', 520 'dark' :'drk', 521 'self' :'slf', 522 'using' :'usg', 523 'helping' :'helpg', 524 'normal' :'nrml', 525 'lawyer' :'atty', 526 'floor' :'flr', 527 'whether' :'whthr', 528 'everything\'s' :'evrthg\'s', 529 'present' :'prsnt', 530 'private' :'priv', 531 'cover' :'cvr', 532 'judge' :'jdg', 533 'upstairs' :'upstrs', 534 'mommy' :'mom', 535 'possibly' :'pssbly', 536 'worst' :'wrst', 537 538 539 /* 540 contractions 541 */ 542 'I am' :'I\'m', 543 'I will' :'I\'ll', 544 'I had' :'I\'d', 545 'I would' :'I\'d', 546 'I have' :'I\'ve', 547 548 'You are' :'You\'re', 549 'You will' :'You\'ll', 550 'You had' :'You\'d', 551 'You would' :'You\'d', 552 'You have' :'You\'ve', 553 554 'He is' :'He\'s', 555 'He has' :'He\'s', 556 'He will' :'He\'ll', 557 'He had' :'He\'d', 558 'He would' :'He\'d', 559 560 'She is' :'She\'s', 561 'She has' :'She\'s', 562 'She will' :'She\'ll', 563 'She had' :'She\'d', 564 'She would' :'She\'d', 565 566 'It is' :'It\'s', 567 'It has' :'It\'s', 568 'It will' :'It\'ll', 569 'It would' :'It\'d', 570 'It had' :'It\'d', 571 572 'We are' :'We\'re', 573 'We will' :'We\'ll', 574 'We had' :'We\'d', 575 'We would' :'We\'d', 576 'We have' :'We\'ve', 577 578 'They are' :'They\'re', 579 'They will' :'They\'ll', 580 'They had' :'They\'d', 581 'They would' :'They\'d', 582 'They have' :'They\'ve', 583 584 'There is' :'There\'s', 585 'There has' :'There\'s', 586 'There will' :'There\'ll', 587 'There had' :'There\'d', 588 'There would' :'There\'d', 589 590 'That is' :'That\'s', 591 'That has' :'That\'s', 592 'That will' :'That\'ll', 593 'That had' :'That\'d', 594 'That would' :'That\'d', 595 596 'are not' :'aren\'t', 597 'can not' :'can\'t', 598 'could not' :'couldn\'t', 599 'did not' :'didn\'t', 600 'does not' :'doesn\'t', 601 'do not' :'don\'t', 602 'had not' :'hadn\'t', 603 'has not' :'hasn\'t', 604 'is not' :'isn\'t', 605 'must not' :'mustn\'t', 606 'need not' :'needn\'t', 607 'should not' :'shouldn\'t', 608 'was not' :'wasn\'t', 609 'were not' :'weren\'t', 610 'will not' :'won\'t', 611 'would not' :'wouldn\'t', 612 613 /* 614 numbers 615 */ 616 'one' :'1', 617 'two' :'2', 618 'three' :'3', 619 'four' :'4', 620 'five' :'5', 621 'six' :'6', 622 'seven' :'7', 623 'eight' :'8', 624 'nine' :'9', 625 'ten' :'10', 626 'eleven' :'11', 627 'twelve' :'12', 628 'twenty' :'20' 629 630 }; 631 632 633 /* 634 these mappings aren't to be altered at all when processed into regexes 635 */ 636 this.baserawmap = { 637 '--' :'–', 638 '-\\s+' :'-', 639 '\\s+-' :'-', 640 '\\s+' :' ', 641 '\\s+$' :'', // trim right 642 '^\\s+' :'', // trim left 643 '\\s?\\.\\.\\.' :'…', // ellipses 644 '\\.\\s+' :'. ', // one space only after periods 645 '\\.\\s*$' :'', // remove end period 646 'RT:? @[a-z0-9_]+:? RT:? @([a-z0-9_]+):?' : 'RT @$1' //remove extra RTs 647 }; 648 }; 649 650 651 /** 652 * This processes the base maps into the this.map object of regexes and replacements 653 */ 654 SpazShortText.prototype.processBaseMaps = function() { 655 var key, val, regex, israw; 656 657 for (key in this.basemap) { 658 val = this.basemap[key]; 659 regex = new RegExp('(\\b)'+key+'(\\b)', 'gi'); 660 this.map[key] = { 661 'short':'$1'+val+'$2', 662 'regex':regex 663 }; 664 } 665 666 /* 667 take the rawmap stuff and glob it into this.map, so we only have one to worry about 668 */ 669 for (key in this.baserawmap) { 670 val = this.baserawmap[key]; 671 regex = new RegExp(key, 'gi'); 672 this.map[key] = { 673 'short':val, 674 'regex':regex 675 }; 676 } 677 678 679 }; 680 681 682 /** 683 * shortens the given text according to the map 684 * 685 * @param {string} text 686 * @return {string} 687 */ 688 SpazShortText.prototype.shorten = function(text) { 689 690 for (var key in this.map) { 691 var re = this.map[key].regex; 692 var rp = this.map[key]["short"]; 693 text = text.replace(re, rp); 694 } 695 696 return text; 697 698 }; 699 700 701 /** 702 * this adds a new mapping to the basemaps and processes the base maps into regexes again 703 * @param {string} search 704 * @param {string} replase 705 * @param {boolean} israw is true, this mapping won't be altered at all when processed into a regex 706 */ 707 SpazShortText.prototype.addMap = function(search, replace, israw) { 708 israw = israw || false; 709 710 if (israw) { 711 this.baserawmap[search] = replace; 712 } else { 713 this.basemap[search] = replace; 714 } 715 716 this.processBaseMaps(); 717 }; 718 719 720 721 /** 722 * returns the map 723 * @return {object} 724 */ 725 SpazShortText.prototype.getMaps = function() { 726 return this.map; 727 };