1 /*jslint 
  2 browser: true,
  3 nomen: false,
  4 debug: true,
  5 forin: true,
  6 undef: true,
  7 white: false,
  8 onevar: false 
  9  */
 10 var sc;
 11 
 12 /**
 13  * A library to shorten text 
 14  * @constructor
 15  */
 16 function SpazShortText() {
 17 
 18 	this.map = {};
 19 		
 20 	this.genBaseMaps();
 21 	this.processBaseMaps();
 22 
 23 	
 24 }
 25 
 26 
 27 
 28 /**
 29  * internal function to generate the default long -> short maps 
 30  */
 31 SpazShortText.prototype.genBaseMaps = function() {
 32 	
 33 	this.basemap = {
 34 		
 35 		/*
 36 			txtspeak type stuff
 37 		*/
 38 		'about'                 :'abt',
 39 		'account'               :'acct',
 40 		'address'               :'addy',
 41 		'anyone'				:'ne1',
 42 		'and'                   :'&',
 43 		'at'					:'@',
 44 		'at the moment'         :'atm',
 45 		'back'					:'bk',
 46 		'be right back' 	    :'brb',
 47 		'be back later' 	    :'bbl',
 48 		'be back soon' 		    :'bbs',
 49 		'because' 			    :'b/c',
 50 		'boyfriend'			    :'bf',
 51 		'but'					:'but',
 52 		'girlfriend'		    :'gf',
 53 		'between'			    :'b/t',
 54 		'by the way'		    :'btw',
 55 		'definitely'		    :'def',
 56 		'everyone'				:'evr1',
 57 		'favorite'				:'fav',
 58 		'for'					:'fr',
 59 		'from'					:'frm',
 60 		'for example'			:'Fr ex',
 61 		'follow'				:'fllw',
 62 		'follower'				:'fllwr',
 63 		'followers'				:'fllwrs',
 64 		'following'				:'fllwng',
 65 		'good'					:'gd',
 66 		'got'					:'gt',
 67 		'having'				:'hvg',
 68 		'hours'					:'hrs',
 69 		'i don\'t know'		    :'idk',
 70 		'if i recall correctly' :'iirc',
 71 		'in my opinion'		    :'imo',
 72 		'in my humble opinion'  :'imho',
 73 		'just'					:'jst',
 74 		'little'				:'lttl',
 75 		'love'				    :'<3',
 76 		'message'			    :'msg',
 77 		'midnight'				:'12am',
 78 		'never mind'		    :'nm',
 79 		'no problem'		    :'np',
 80 		'not much'			    :'nm',
 81 		'pages'					:'pgs',
 82 		'pictures'			    :'pics',
 83 		'obviously'			    :'obvs',
 84 		'please'			    :'pls',
 85 		'seriously'			    :'srsly',
 86 		'something'			    :'s/t',
 87 		'sorry'				    :'sry',
 88 		'text'				    :'txt',
 89 		'thanks'			    :'thx',
 90 		'think'				    :'thk',
 91 		'to be honest'		    :'tbh',
 92 		'though'				:'tho',
 93 		'through'				:'thru',
 94 		'weeks'					:'wks',
 95 		'with'					:'w',
 96 		'without'				:'w/o',
 97 		
 98 		'that'			:'tht',
 99 		'what'			:'wht',
100 		'have'			:'hv',
101 		'don\'t'			:'dnt',
102 		'was'			:'ws',
103 		'well'			:'wll',
104 		'right'			:'rt',
105 		'here'			:'hr',
106 		'going'			:'gng',
107 		'like'			:'lk',
108 		'can'			:'cn',
109 		'want'			:'wnt',
110 		'that\'s'			:'thts',
111 		'there'			:'thr',
112 		'come'			:'cme',
113 		'really'			:'rly',
114 		'would'			:'wld',
115 		'look'			:'lk',
116 		'when'			:'whn',
117 		'okay'			:'ok',
118 		'can\'t'			:'cnt',
119 		'tell'			:'tll',
120 		'I\'ll'			:'Ill',
121 		'could'			:'cl',
122 		'didn\'t'			:'ddnt',
123 		'yes'			:'y',
124 		'had'			:'hd',
125 		'then'			:'thn',
126 		'take'			:'tke',
127 		'make'			:'mk',
128 		'gonna'			:'gna',
129 		'never'			:'nvr',
130 		'them'			:'thm',
131 		'more'			:'mr',
132 		'over'			:'ovr',
133 		'where'			:'whr',
134 		'what\'s'			:'whts',
135 		'thing'			:'thg',
136 		'maybe'			:'mybe',
137 		'down'			:'dwn',
138 		'very'			:'very',
139 		'should'			:'shld',
140 		'anything'			:'nethg',
141 		'said'			:'sd',
142 		'any'			:'ne',
143 		'even'			:'evn',
144 		'thank'			:'thk',
145 		'give'			:'gve',
146 		'thought'			:'thot',
147 		'help'			:'hlp',
148 		'talk'			:'tlk',
149 		'people'			:'ppl',
150 		'find'			:'fnd',
151 		'nothing'			:'nthg',
152 		'again'			:'agn',
153 		'things'			:'thgs',
154 		'call'			:'cll',
155 		'told'			:'tld',
156 		'great'			:'grt',
157 		'before'			:'b4',
158 		'better'			:'bttr',
159 		'ever'			:'evr',
160 		'night'			:'nite',
161 		'than'			:'thn',
162 		'away'			:'awy',
163 		'first'			:'1st',
164 		'believe'			:'blve',
165 		'other'			:'othr',
166 		'everything'			:'evrythg',
167 		'work'			:'wrk',
168 		'fine'			:'fne',
169 		'home'			:'hme',
170 		'after'			:'aftr',
171 		'last'			:'lst',
172 		'keep'			:'kp',
173 		'around'			:'arnd',
174 		'stop'			:'stp',
175 		'long'			:'lng',
176 		'always'			:'alwys',
177 		'listen'			:'lstn',
178 		'wanted'			:'wntd',
179 		'happened'			:'hppnd',
180 		'won\'t'			:'wnt',
181 		'trying'			:'tryng',
182 		'kind'			:'knd',
183 		'wrong'			:'wrng',
184 		'talking'			:'tlkg',
185 		'being'			:'bng',
186 		'bad'			:'bd',
187 		'remember'			:'rmbr',
188 		'getting'			:'gttg',
189 		'together'			:'togthr',
190 		'mother'			:'mom',
191 		'understand'			:'undrstd',
192 		'wouldn\'t'			:'wldnt',
193 		'actually'			:'actly',
194 		'baby'			:'bby',
195 		'father'			:'dad',
196 		'done'			:'dne',
197 		'wasn\'t'			:'wsnt',
198 		'might'			:'mite',
199 		'every'			:'evry',
200 		'enough'			:'engh',
201 		'someone'			:'sm1',
202 		'family'			:'fmly',
203 		'whole'			:'whl',
204 		'another'			:'anthr',
205 		'jack'			:'jck',
206 		'yourself'			:'yrslf',
207 		'best'			:'bst',
208 		'must'			:'mst',
209 		'coming'			:'cmg',
210 		'looking'			:'lkg',
211 		'woman'			:'wmn',
212 		'which'			:'whch',
213 		'years'			:'yrs',
214 		'room'			:'rm',
215 		'left'			:'lft',
216 		'tonight'			:'2nte',
217 		'real'			:'rl',
218 		'hmm'			:'hm',
219 		'happy'			:'hpy',
220 		'pretty'			:'prty',
221 		'girl'			:'grl',
222 		'show'			:'shw',
223 		'friend'			:'frnd',
224 		'already'			:'alrdy',
225 		'saying'			:'syng',
226 		'next'			:'nxt',
227 		'job'			:'jb',
228 		'problem'			:'prblm',
229 		'minute'			:'min',
230 		'found'			:'fnd',
231 		'world'			:'wrld',
232 		'thinking'			:'thkg',
233 		'haven\'t'			:'hvnt',
234 		'heard'			:'hrd',
235 		'honey'			:'hny',
236 		'matter'			:'mttr',
237 		'myself'			:'myslf',
238 		'couldn\'t'			:'cldnt',
239 		'exactly'			:'xctly',
240 		'probably'			:'prob',
241 		'happen'			:'hppn',
242 		'we\'ve'			:'wve',
243 		'hurt'			:'hrt',
244 		'both'			:'bth',
245 		'gotta'			:'gtta',
246 		'alone'			:'alne',
247 		'excuse'			:'xcse',
248 		'start'			:'strt',
249 		'today'			:'2dy',
250 		'ready'			:'rdy',
251 		'until'			:'untl',
252 		'whatever'			:'wtevr',
253 		'wants'			:'wnts',
254 		'hold'			:'hld',
255 		'yet'			:'yt',
256 		'took'			:'tk',
257 		'once'			:'1ce',
258 		'gone'			:'gne',
259 		'called'			:'clld',
260 		'morning'			:'morn',
261 		'supposed'			:'sppsd',
262 		'friends'			:'frnds',
263 		'stuff'			:'stff',
264 		'most'			:'mst',
265 		'used'			:'usd',
266 		'worry'			:'wrry',
267 		'second'			:'2nd',
268 		'part'			:'prt',
269 		'truth'			:'trth',
270 		'school'			:'schl',
271 		'forget'			:'frgt',
272 		'business'			:'biz',
273 		'cause'			:'cuz',
274 		'telling'			:'tllg',
275 		'chance'			:'chnce',
276 		'move'			:'mv',
277 		'person'			:'prsn',
278 		'somebody'			:'smbdy',
279 		'heart'			:'hrt',
280 		'point'			:'pt',
281 		'later'			:'ltr',
282 		'making'			:'makg',
283 		'anyway'			:'nywy',
284 		'many'			:'mny',
285 		'phone'			:'phn',
286 		'reason'			:'rsn',
287 		'looks'			:'lks',
288 		'bring'			:'brng',
289 		'turn'			:'trn',
290 		'tomorrow'			:'tmrw',
291 		'trust'			:'trst',
292 		'check'			:'chk',
293 		'change'			:'chng',
294 		'anymore'			:'anymr',
295 		'town'			:'twn',
296 		'aren\'t'			:'rnt',
297 		'working'			:'wrkg',
298 		'year'			:'yr',
299 		'taking'			:'tkg',
300 		'means'			:'mns',
301 		'brother'			:'bro',
302 		'play'			:'ply',
303 		'hate'			:'h8',
304 		'says'			:'sez',
305 		'beautiful'			:'btfl',
306 		'crazy'			:'crzy',
307 		'party'			:'prty',
308 		'afraid'			:'afrd',
309 		'important'			:'imptnt',
310 		'rest'			:'rst',
311 		'word'			:'wrd',
312 		'watch'			:'wtch',
313 		'glad'			:'gld',
314 		'sister'			:'sistr',
315 		'minutes'			:'min',
316 		'everybody'			:'evrybdy',
317 		'couple'			:'cpl',
318 		'either'			:'ethr',
319 		'feeling'			:'flg',
320 		'under'			:'undr',
321 		'break'			:'brk',
322 		'promise'			:'prmse',
323 		'easy'			:'ez',
324 		'question'			:'q',
325 		'doctor'			:'doc',
326 		'walk'			:'wlk',
327 		'trouble'			:'trbl',
328 		'different'			:'diff',
329 		'hospital'			:'hsptl',
330 		'anybody'			:'anybdy',
331 		'wedding'			:'wddg',
332 		'perfect'			:'prfct',
333 		'police'			:'cops',
334 		'waiting'			:'wtng',
335 		'dinner'			:'din',
336 		'against'			:'agst',
337 		'funny'			:'fny',
338 		'husband'			:'hsbnd',
339 		'child'			:'kid',
340 		'shouldn\'t'			:'shldnt',
341 		'half'			:'1/2',
342 		'moment'			:'mmnt',
343 		'sleep'			:'slp',
344 		'started'			:'strtd',
345 		'young'			:'yng',
346 		'sounds'			:'snds',
347 		'lucky'			:'lky',
348 		'sometimes'			:'smtimes',
349 		'plan'			:'pln',
350 		'serious'			:'srs',
351 		'ahead'			:'ahd',
352 		'week'			:'wk',
353 		'wonderful'			:'wndfl',
354 		'past'			:'pst',
355 		'number'			:'#',
356 		'nobody'			:'nbdy',
357 		'along'			:'alng',
358 		'finally'			:'fnly',
359 		'worried'			:'wrrd',
360 		'book'			:'bk',
361 		'sort'			:'srt',
362 		'safe'			:'sfe',
363 		'living'			:'livg',
364 		'children'			:'kids',
365 		'weren\'t'			:'wrnt',
366 		'front'			:'frnt',
367 		'loved'			:'luvd',
368 		'asking'			:'askg',
369 		'running'			:'rnng',
370 		'clear'			:'clr',
371 		'figure'			:'fgr',
372 		'felt'			:'flt',
373 		'parents'			:'prnts',
374 		'absolutely'			:'abs',
375 		'alive'			:'alve',
376 		'meant'			:'mnt',
377 		'happens'			:'hppns',
378 		'kidding'			:'kddg',
379 		'full'			:'fl',
380 		'meeting'			:'mtg',
381 		'coffee'			:'cffe',
382 		'sound'			:'snd',
383 		'women'			:'wmn',
384 		'welcome'			:'wlcm',
385 		'months'			:'mnths',
386 		'hour'			:'hr',
387 		'speak'			:'spk',
388 		'thinks'			:'thks',
389 		'Christmas'			:'Xmas',
390 		'possible'			:'pssble',
391 		'worse'			:'wrs',
392 		'company'			:'co',
393 		'mistake'			:'mstk',
394 		'handle'			:'hndl',
395 		'spend'			:'spnd',
396 		'totally'			:'ttly',
397 		'giving'			:'gvg',
398 		'control'			:'ctrl',
399 		'realize'			:'rlze',
400 		'power'			:'pwr',
401 		'president'			:'pres',
402 		'girls'			:'grls',
403 		'taken'			:'tkn',
404 		'picture'			:'pic',
405 		'talked'			:'tlkd',
406 		'hundred'			:'hndrd',
407 		'changed'			:'chgd',
408 		'completely'		:'cmpltly', 
409 		'explain'			:'exp',
410 		'playing'			:'plyg',
411 		'relationship'			:'rlshp',
412 		'loves'			:'lvs',
413 		'fucking'			:'fkg',
414 		'anywhere'			:'newhr',
415 		'questions'			:'qs',
416 		'wonder'			:'wndr',
417 		'calling'			:'cllg',
418 		'somewhere'			:'smwhr',
419 		'straight'			:'str8',
420 		'fast'			:'fst',
421 		'words'			:'wrds',
422 		'worked'			:'wrkd',
423 		'light'			:'lite',
424 		'cannot'			:'can\'t',
425 		'protect'			:'prtct',
426 		'class'			:'cls',
427 		'surprise'			:'sprise',
428 		'sweetheart'			:'swthrt',
429 		'looked'			:'lkd',
430 		'except'			:'xcpt',
431 		'takes'			:'tks',
432 		'situation'			:'sitn',
433 		'besides'			:'bsds',
434 		'pull'			:'pll',
435 		'himself'			:'hmslf',
436 		'hasn\'t'			:'hsnt',
437 		'worth'			:'wrth',
438 		'amazing'			:'amzg',
439 		'given'			:'gvn',
440 		'expect'			:'xpct',
441 		'rather'			:'rthr',
442 		'black'			:'blk',
443 		'movie'			:'film',
444 		'country'			:'cntry',
445 		'perhaps'			:'prhps',
446 		'watching'			:'wtchg',
447 		'darling'			:'darlg',
448 		'honor'			:'hnr',
449 		'personal'			:'prsnl',
450 		'moving'			:'movg',
451 		'till'			:'til',
452 		'admit'			:'admt',
453 		'problems'			:'prbs',
454 		'information'			:'info',
455 		'honest'			:'hnst',
456 		'missed'			:'mssd',
457 		'longer'			:'lngr',
458 		'dollars'			:'$s',
459 		'evening'			:'eve',
460 		'starting'			:'strtg',
461 		'suppose'			:'spps',
462 		'street'			:'st',
463 		'sitting'			:'sttg',
464 		'favor'			:'fvr',
465 		'apartment'			:'apt',
466 		'court'			:'crt',
467 		'terrible'			:'trrbl',
468 		'clean'			:'cln',
469 		'learn'			:'lrn',
470 		'works'			:'wks',
471 		'relax'			:'rlx',
472 		'million'			:'mil',
473 		'prove'			:'prv',
474 		'smart'			:'smrt',
475 		'missing'			:'missg',
476 		'forgot'			:'frgt',
477 		'small'			:'sm',
478 		'interested'			:'intrstd',
479 		'table'			:'tbl',
480 		'become'			:'bcm',
481 		'pregnant'			:'preg',
482 		'middle'			:'mddl',
483 		'ring'			:'rng',
484 		'careful'			:'crfl',
485 		'figured'			:'fgrd',
486 		'stick'			:'stk',
487 		'stopped'			:'stppd',
488 		'standing'			:'stndg',
489 		'forgive'			:'frgv',
490 		'wearing'			:'wearg',
491 		'hoping'			:'hopg',
492 		'thousand'			:'k',
493 		'paper'			:'ppr',
494 		'tough'			:'tuff',
495 		'count'			:'cnt',
496 		'birthday'			:'bday',
497 		'history'			:'hstry',
498 		'share'			:'shr',
499 		'offer'			:'offr',
500 		'hurry'			:'hrry',
501 		'feet'			:'ft',
502 		'wondering'			:'wonderg',
503 		'building'			:'buildg',
504 		'ones'			:'1s',
505 		'finish'			:'fin',
506 		'would\'ve'			:'wldve',
507 		'interesting'			:'intrstg',
508 		'enjoy'			:'njoy',
509 		'road'			:'rd',
510 		'staying'			:'stayg',
511 		'short'			:'shrt',
512 		'finished'			:'fin',
513 		'respect'			:'rspct',
514 		'spent'			:'spnt',
515 		'attention'			:'attn',
516 		'holding'			:'hldg',
517 		'surprised'			:'srprsd',
518 		'keeping'			:'kpg',
519 		'putting'			:'puttg',
520 		'dark'			:'drk',
521 		'self'			:'slf',
522 		'using'			:'usg',
523 		'helping'			:'helpg',
524 		'normal'			:'nrml',
525 		'lawyer'			:'atty',
526 		'floor'			:'flr',
527 		'whether'			:'whthr',
528 		'everything\'s'			:'evrthg\'s',
529 		'present'			:'prsnt',
530 		'private'			:'priv',
531 		'cover'			:'cvr',
532 		'judge'			:'jdg',
533 		'upstairs'			:'upstrs',
534 		'mommy'			:'mom',
535 		'possibly'			:'pssbly',
536 		'worst'			:'wrst',
537 		
538 		
539 		/*
540 			contractions
541 		*/
542 		'I am'				:'I\'m',
543 		'I will'			:'I\'ll',
544 		'I had'				:'I\'d',
545 		'I would'			:'I\'d',
546 		'I have'			:'I\'ve',
547 
548 		'You are'			:'You\'re',
549 		'You will'			:'You\'ll',
550 		'You had'			:'You\'d',
551 		'You would'			:'You\'d',
552 		'You have'			:'You\'ve',
553 
554 		'He is'				:'He\'s',
555 		'He has'			:'He\'s',
556 		'He will'			:'He\'ll',
557 		'He had'			:'He\'d',
558 		'He would'			:'He\'d',
559 
560 		'She is'			:'She\'s',
561 		'She has'			:'She\'s',
562 		'She will'			:'She\'ll',
563 		'She had'			:'She\'d',
564 		'She would'			:'She\'d',
565 
566 		'It is'				:'It\'s',
567 		'It has'			:'It\'s',
568 		'It will'			:'It\'ll',
569 		'It would'			:'It\'d',
570 		'It had'			:'It\'d',
571 
572 		'We are'			:'We\'re',
573 		'We will'			:'We\'ll',
574 		'We had'			:'We\'d',
575 		'We would'			:'We\'d',
576 		'We have'			:'We\'ve',
577 
578 		'They are'			:'They\'re',
579 		'They will'			:'They\'ll',
580 		'They had'			:'They\'d',
581 		'They would'		:'They\'d',
582 		'They have'			:'They\'ve',
583 
584 		'There is'			:'There\'s',
585 		'There has'			:'There\'s',
586 		'There will'		:'There\'ll',
587 		'There had'			:'There\'d',
588 		'There would'		:'There\'d',
589 
590 		'That is'			:'That\'s',
591 		'That has'			:'That\'s',
592 		'That will'			:'That\'ll',
593 		'That had'			:'That\'d',
594 		'That would'		:'That\'d',
595 		
596 		'are not'			:'aren\'t',
597 		'can not'			:'can\'t',
598 		'could not'			:'couldn\'t',
599 		'did not'			:'didn\'t',
600 		'does not'			:'doesn\'t',
601 		'do not'			:'don\'t',
602 		'had not'			:'hadn\'t',
603 		'has not'			:'hasn\'t',
604 		'is not'			:'isn\'t',
605 		'must not'			:'mustn\'t',
606 		'need not'			:'needn\'t',
607 		'should not'		:'shouldn\'t',
608 		'was not'			:'wasn\'t',
609 		'were not'			:'weren\'t',
610 		'will not'			:'won\'t',
611 		'would not'			:'wouldn\'t',
612 		
613 		/*
614 			numbers
615 		*/
616 		'one'					:'1',
617 		'two'					:'2',
618 		'three'					:'3',
619 		'four'					:'4',
620 		'five'					:'5',
621 		'six'					:'6',
622 		'seven'					:'7',
623 		'eight'					:'8',
624 		'nine'					:'9',
625 		'ten'					:'10',
626 		'eleven'				:'11',
627 		'twelve'				:'12',
628 		'twenty'				:'20'
629 		
630 	};
631 	
632 	
633 	/*
634 		these mappings aren't to be altered at all when processed into regexes
635 	*/
636 	this.baserawmap = {
637 		'--'					:'–',
638 		'-\\s+'					:'-',
639 		'\\s+-'					:'-',
640 		'\\s+'					:' ',
641 		'\\s+$'					:'',  // trim right
642 		'^\\s+'					:'',  // trim left
643 		'\\s?\\.\\.\\.'				:'…',  // ellipses
644 		'\\.\\s+'				:'. ', // one space only after periods
645 		'\\.\\s*$'				:'',   // remove end period
646 		'RT:? @[a-z0-9_]+:? RT:? @([a-z0-9_]+):?' : 'RT @$1' //remove extra RTs
647 	};
648 };
649 
650 
651 /**
652  * This processes the base maps into the this.map object of regexes and replacements 
653  */
654 SpazShortText.prototype.processBaseMaps = function() {
655 	var key, val, regex, israw;
656 	
657 	for (key in this.basemap) {
658 		val = this.basemap[key];
659 		regex = new RegExp('(\\b)'+key+'(\\b)', 'gi');
660 		this.map[key] = {
661 			'short':'$1'+val+'$2',
662 			'regex':regex
663 		};
664 	}
665 	
666 	/*
667 		take the rawmap stuff and glob it into this.map, so we only have one to worry about
668 	*/
669 	for (key in this.baserawmap) {
670 		val = this.baserawmap[key];
671 		regex = new RegExp(key, 'gi');
672 		this.map[key] = {
673 			'short':val,
674 			'regex':regex
675 		};
676 	}
677 	
678 	
679 };
680 
681 
682 /**
683  * shortens the given text according to the map
684  * 
685  * @param {string} text
686  * @return {string} 
687  */
688 SpazShortText.prototype.shorten = function(text) {
689 	
690 	for (var key in this.map) {
691 		var re = this.map[key].regex;
692 		var rp = this.map[key]["short"];
693 		text = text.replace(re, rp);
694 	}
695 	
696 	return text;
697 	
698 };
699 
700 
701 /**
702  * this adds a new mapping to the basemaps and processes the base maps into regexes again
703  * @param {string} search 
704  * @param {string} replase
705  * @param {boolean} israw is true, this mapping won't be altered at all when processed into a regex
706  */
707 SpazShortText.prototype.addMap = function(search, replace, israw) {
708 	israw = israw || false;
709 	
710 	if (israw) {
711 		this.baserawmap[search] = replace;
712 	} else {
713 		this.basemap[search] = replace;
714 	}
715 	
716 	this.processBaseMaps();
717 };
718 
719 
720 
721 /**
722  * returns the map
723  * @return {object} 
724  */
725 SpazShortText.prototype.getMaps = function() {
726 	return this.map;
727 };