00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <fcntl.h>
00021 #include <stddef.h>
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <unistd.h>
00026
00027 #include <expat.h>
00028
00029 #include <ccn/coding.h>
00030 #include <ccn/charbuf.h>
00031 #include <ccn/extend_dict.h>
00032
00033 static void
00034 usage(const char *progname)
00035 {
00036 fprintf(stderr,
00037 "usage: %s [-h] [-w] [-d dict]* file ...\n"
00038 " Utility to convert XML into ccn binary encoded data (ccnb)\n"
00039 " -h print usage and exit\n"
00040 " -w toss UDATA content consisting of only whitespace\n"
00041 " -d dict additional csv format dictionary file(s)\n"
00042 " use - for file to specify filter mode (stdin, stdout)\n"
00043 " otherwise output files get .ccnb extension\n",
00044 progname);
00045 exit(1);
00046 }
00047
00048 struct ccn_encoder_stack_item {
00049 size_t start;
00050 size_t end;
00051 struct ccn_encoder_stack_item *link;
00052 };
00053
00054 struct ccn_encoder {
00055 struct ccn_charbuf *openudata;
00056 int is_base64binary;
00057 int is_hexBinary;
00058 int is_text;
00059 int toss_white;
00060 const struct ccn_dict_entry *tagdict;
00061 int tagdict_count;
00062 FILE *outfile;
00063 };
00064
00065 struct base64_decoder {
00066 size_t input_processed;
00067 size_t result_size;
00068 unsigned char *output;
00069 size_t output_size;
00070 unsigned partial;
00071 int phase;
00072 };
00073
00074
00075 static void
00076 base64_decode_bytes(struct base64_decoder *d, const void *p, size_t count)
00077 {
00078 size_t i;
00079 size_t oi = d->result_size;
00080 const char *s = p;
00081 unsigned partial = d->partial;
00082 unsigned endgame = partial & 0x100;
00083 int phase = d->phase;
00084 char ch;
00085 if (phase < 0)
00086 return;
00087 for (i = 0; i < count; i++) {
00088 ch = s[i];
00089
00090
00091
00092
00093 if ('A' <= ch && ch <= 'Z')
00094 ch -= 'A';
00095 else if ('a' <= ch && ch <= 'z')
00096 ch -= 'a' - 26;
00097 else if ('0' <= ch && ch <= '9')
00098 ch -= '0' - 52;
00099 else if (ch == '+')
00100 ch = 62;
00101 else if (ch == '/')
00102 ch = 63;
00103 else if (ch == ' ' || ch == '\t' || ch == '\n')
00104 continue;
00105 else if (ch == '=')
00106 if (phase > 4 || (partial & 3) != 0)
00107 phase = -1;
00108 else {
00109 phase -= 2;
00110 partial >>= 2;
00111 endgame = 0x100;
00112 continue;
00113 }
00114 else {
00115 phase = -1;
00116 break;
00117 }
00118 if (endgame != 0) {
00119 phase = -1;
00120 break;
00121 }
00122 partial <<= 6;
00123 partial |= ch;
00124 phase += 6;
00125 if (phase >= 8) {
00126 if (oi < d->output_size)
00127 d->output[oi] = partial >> (phase - 8);
00128 oi += 1;
00129 phase -= 8;
00130 }
00131 }
00132 d->phase = phase;
00133 d->partial = partial & ((1<<6)-1);
00134 d->result_size = oi;
00135 }
00136
00137 static int
00138 dict_lookup(const char *key, const struct ccn_dict_entry *dict, int n)
00139 {
00140 int i;
00141 for (i = 0; i < n; i++)
00142 if (0 == strcmp(key, dict[i].name))
00143 return (dict[i].index);
00144 return (-1);
00145 }
00146
00147 struct ccn_encoder *
00148 ccn_encoder_create(FILE *outfile, const struct ccn_dict *dtags)
00149 {
00150 struct ccn_encoder *c;
00151 c = calloc(1, sizeof(*c));
00152 if (c) {
00153 c->openudata = ccn_charbuf_create();
00154 if (c->openudata != NULL)
00155 ccn_charbuf_reserve(c->openudata, 128);
00156 c->outfile = outfile;
00157 c->tagdict = dtags->dict;
00158 c->tagdict_count = dtags->count;
00159 }
00160 return(c);
00161 }
00162
00163 void
00164 ccn_encoder_destroy(struct ccn_encoder **cbp)
00165 {
00166 struct ccn_encoder *c = *cbp;
00167 if (c != NULL) {
00168 ccn_charbuf_destroy(&c->openudata);
00169 free(c);
00170 *cbp = NULL;
00171 }
00172 }
00173
00174 static void
00175 emit_bytes(struct ccn_encoder *u, const void *p, size_t length)
00176 {
00177
00178 (void)fwrite(p, 1, length, u->outfile);
00179 }
00180
00181 static void
00182 emit_tt(struct ccn_encoder *u, size_t numval, enum ccn_tt tt)
00183 {
00184 unsigned char buf[1+8*((sizeof(numval)+6)/7)];
00185 unsigned char *p = buf + (sizeof(buf)-1);
00186 int n = 1;
00187 p[0] = (CCN_TT_HBIT & ~CCN_CLOSE) |
00188 ((numval & CCN_MAX_TINY) << CCN_TT_BITS) |
00189 (CCN_TT_MASK & tt);
00190 numval >>= (7-CCN_TT_BITS);
00191 while (numval != 0) {
00192 (--p)[0] = (((unsigned char)numval) & ~CCN_TT_HBIT) | CCN_CLOSE;
00193 n++;
00194 numval >>= 7;
00195 }
00196 emit_bytes(u, p, n);
00197 }
00198
00199 static int
00200 all_whitespace(struct ccn_charbuf *b)
00201 {
00202 size_t i;
00203 size_t n = b->length;
00204 for (i = 0; i < n; i++) {
00205 switch (b->buf[i]) {
00206 case ' ':
00207 case '\t':
00208 case '\n':
00209 continue;
00210 }
00211 return(0);
00212 }
00213 return(1);
00214 }
00215
00216 static void
00217 finish_openudata(struct ccn_encoder *u)
00218 {
00219 if (u->is_base64binary) {
00220 unsigned char *obuf = NULL;
00221 ssize_t len = -1;
00222 size_t maxbinlen = u->openudata->length * 3 / 4 + 4;
00223 struct base64_decoder d = { 0 };
00224 u->is_base64binary = 0;
00225 obuf = ccn_charbuf_reserve(u->openudata, maxbinlen);
00226 if (obuf != NULL) {
00227 d.output = obuf;
00228 d.output_size = maxbinlen;
00229 base64_decode_bytes(&d, u->openudata->buf, u->openudata->length);
00230 if (d.phase == 0 && d.result_size <= d.output_size)
00231 len = d.result_size;
00232 }
00233 if (len == -1) {
00234 fprintf(stderr,
00235 "could not decode base64binary, leaving as character data\n");
00236 }
00237 else {
00238 emit_tt(u, len, CCN_BLOB);
00239 emit_bytes(u, obuf, len);
00240 u->openudata->length = 0;
00241 return;
00242 }
00243 }
00244 else if (u->is_hexBinary) {
00245 size_t maxbinlen = (u->openudata->length + 1)/2;
00246 unsigned char *obuf = NULL;
00247 int v = -1;
00248 size_t i;
00249 size_t j = 0;
00250 unsigned char ch;
00251 u->is_hexBinary = 0;
00252 obuf = ccn_charbuf_reserve(u->openudata, maxbinlen);
00253 if (obuf != NULL) {
00254 for (v = 1, i = 0, j = 0; v > 0 && i < u->openudata->length; i++) {
00255 ch = u->openudata->buf[i];
00256 if (ch <= ' ')
00257 continue;
00258 v = (v << 4) + (('0' <= ch && ch <= '9') ? (ch - '0') :
00259 ('A' <= ch && ch <= 'F') ? (ch - 'A' + 10) :
00260 ('a' <= ch && ch <= 'f') ? (ch - 'a' + 10) :
00261 -1024);
00262 if (v > 255) {
00263 if (j >= maxbinlen)
00264 break;
00265 obuf[j++] = v & 255;
00266 v = 1;
00267 }
00268 }
00269 }
00270 if (v != 1) {
00271 fprintf(stderr,
00272 "could not decode hexBinary, leaving as character data\n");
00273 }
00274 else {
00275 emit_tt(u, j, CCN_BLOB);
00276 emit_bytes(u, obuf, j);
00277 u->openudata->length = 0;
00278 return;
00279 }
00280 }
00281 else if (u->is_text) {
00282 u->is_text = 0;
00283 emit_tt(u, u->openudata->length, CCN_BLOB);
00284 emit_bytes(u, u->openudata->buf, u->openudata->length);
00285 u->openudata->length = 0;
00286 return;
00287 }
00288 if (u->openudata->length != 0) {
00289 if (!(u->toss_white && all_whitespace(u->openudata))) {
00290 emit_tt(u, u->openudata->length, CCN_UDATA);
00291 emit_bytes(u, u->openudata->buf, u->openudata->length);
00292 }
00293 u->openudata->length = 0;
00294 }
00295 }
00296
00297 static void
00298 emit_name(struct ccn_encoder *u, enum ccn_tt tt, const void *name)
00299 {
00300 size_t length = strlen(name);
00301 int dictindex = -1;
00302 if (length == 0) return;
00303 finish_openudata(u);
00304 if (tt == CCN_TAG) {
00305 dictindex = dict_lookup(name, u->tagdict, u->tagdict_count);
00306 if (dictindex >= 0) {
00307 emit_tt(u, dictindex, CCN_DTAG);
00308 return;
00309 }
00310 }
00311 emit_tt(u, length-1, tt);
00312 emit_bytes(u, name, length);
00313 }
00314
00315 static void
00316 emit_xchars(struct ccn_encoder *u, const XML_Char *xchars)
00317 {
00318 size_t length = strlen(xchars);
00319 finish_openudata(u);
00320 emit_tt(u, length, CCN_UDATA);
00321 emit_bytes(u, xchars, length);
00322 }
00323
00324 static void
00325 emit_closer(struct ccn_encoder *u)
00326 {
00327 static const unsigned char closer[] = { CCN_CLOSE };
00328 finish_openudata(u);
00329 emit_bytes(u, closer, sizeof(closer));
00330 }
00331
00332 static void
00333 do_start_element(void *ud, const XML_Char *name,
00334 const XML_Char **atts)
00335 {
00336 struct ccn_encoder *u = ud;
00337 const XML_Char **att;
00338 int is_base64binary = 0;
00339 int is_hexBinary = 0;
00340 int is_text = 0;
00341 emit_name(u, CCN_TAG, name);
00342 for (att = atts; att[0] != NULL; att += 2) {
00343 if (0 == strcmp(att[0], "ccnbencoding")) {
00344 if (0 == strcmp(att[1], "base64Binary")) {
00345 is_base64binary = 1;
00346 continue;
00347 }
00348 if (0 == strcmp(att[1], "hexBinary")) {
00349 is_hexBinary = 1;
00350 continue;
00351 }
00352 if (0 == strcmp(att[1], "text")) {
00353 is_text = 1;
00354 continue;
00355 }
00356 fprintf(stderr, "warning - unknown ccnbencoding found (%s)\n", att[1]);
00357 }
00358 emit_name(u, CCN_ATTR, att[0]);
00359 emit_xchars(u, att[1]);
00360 }
00361 u->is_base64binary = is_base64binary;
00362 u->is_hexBinary = is_hexBinary;
00363 u->is_text = is_text;
00364 }
00365
00366 static void
00367 do_end_element(void *ud, const XML_Char *name)
00368 {
00369 struct ccn_encoder *u = ud;
00370 emit_closer(u);
00371 }
00372
00373 static void
00374 do_character_data(void *ud, const XML_Char *s, int len)
00375 {
00376 struct ccn_encoder *u = ud;
00377 ccn_charbuf_append(u->openudata, s, len);
00378 }
00379
00380 static void
00381 do_processing_instructions(void *ud, const XML_Char *target, const XML_Char *data)
00382 {
00383 struct ccn_encoder *u = ud;
00384 finish_openudata(u);
00385 emit_tt(u, CCN_PROCESSING_INSTRUCTIONS, CCN_EXT);
00386 emit_xchars(u, target);
00387 emit_xchars(u, data);
00388 emit_closer(u);
00389 }
00390
00391 #define TOSS_WHITE 1
00392 static int
00393 process_fd(int fd, FILE *outfile, int flags, const struct ccn_dict *dtags)
00394 {
00395 char buf[17];
00396 ssize_t len;
00397 int res = 0;
00398 struct ccn_encoder *u;
00399 XML_Parser p;
00400 u = ccn_encoder_create(outfile, dtags);
00401 if (u == NULL) return(1);
00402 if (flags & TOSS_WHITE) {
00403 u->toss_white = 1;
00404 }
00405 p = XML_ParserCreate(NULL);
00406 XML_SetUserData(p, u);
00407 XML_SetElementHandler(p, &do_start_element, &do_end_element);
00408 XML_SetCharacterDataHandler(p, &do_character_data);
00409 XML_SetProcessingInstructionHandler(p, &do_processing_instructions);
00410
00411 while ((len = read(fd, buf, sizeof(buf))) > 0) {
00412 if (XML_Parse(p, buf, len, 0) != XML_STATUS_OK) {
00413 res |= 1;
00414 break;
00415 }
00416 }
00417 if (len < 0) {
00418 perror("read");
00419 res |= 1;
00420 }
00421 if (XML_Parse(p, buf, 0, 1) != XML_STATUS_OK) {
00422 fprintf(stderr, "xml parse error line %ld\n", (long)XML_GetCurrentLineNumber(p));
00423 res |= 1;
00424 }
00425 XML_ParserFree(p);
00426 ccn_encoder_destroy(&u);
00427
00428 return(res);
00429 }
00430
00431 static int
00432 process_file(char *path, int flags, const struct ccn_dict *dtags)
00433 {
00434 int fd = 0;
00435 int res = 0;
00436 FILE *outfile = stdout;
00437 const char *basename;
00438 const char *ext;
00439 char *outname = NULL;
00440 const char outext[] = ".ccnb\0";
00441 if (0 != strcmp(path, "-")) {
00442 fd = open(path, O_RDONLY);
00443 if (-1 == fd) {
00444 perror(path);
00445 return(1);
00446 }
00447 basename = strrchr(path, '/');
00448 if (basename == NULL)
00449 basename = path;
00450 else
00451 basename++;
00452 ext = strrchr(basename, '.');
00453 if (ext == NULL || 0 != strcasecmp(ext, ".xml"))
00454 ext = strrchr(basename, 0);
00455 outname = calloc(1, ext - basename + sizeof(outext));
00456 if (outname == NULL) { perror("calloc"); exit(1); }
00457 memcpy(outname, basename, ext - basename);
00458 memcpy(outname + (ext - basename), outext, sizeof(outext));
00459 outfile = fopen(outname, "wb");
00460 if (outfile == NULL) {
00461 perror(outname);
00462 free(outname);
00463 outname = NULL;
00464 res |= 1;
00465 }
00466 }
00467 if (res == 0) {
00468 res = process_fd(fd, outfile, flags, dtags);
00469 fflush(outfile);
00470 }
00471 if (outfile != NULL && outfile != stdout) {
00472 if (ferror(outfile)) {
00473 res |= 1;
00474 fprintf(stderr, " %s: output error\n", outname);
00475 clearerr(outfile);
00476 }
00477 fclose(outfile);
00478 if (res == 0)
00479 fprintf(stderr, " %s written.\n", outname);
00480 }
00481 if (fd > 0)
00482 close(fd);
00483 if (res != 0 && outname != NULL) {
00484 unlink(outname);
00485 }
00486 if (outname != NULL)
00487 free(outname);
00488 return(res);
00489 }
00490
00491 int
00492 main(int argc, char **argv)
00493 {
00494 int i;
00495 int res = 0;
00496 int dictres = 0;
00497 int flags = 0;
00498 struct ccn_dict *dtags = (struct ccn_dict *)&ccn_dtag_dict;
00499
00500 if (argv[1] == NULL)
00501 usage(argv[0]);
00502 for (i = 1; argv[i] != 0; i++) {
00503 if (0 == strcmp(argv[i], "-h")) {
00504 usage(argv[0]);
00505 }
00506 if (0 == strcmp(argv[i], "-w")) {
00507 flags |= TOSS_WHITE;
00508 continue;
00509 }
00510 if (0 == strcmp(argv[i], "-d")) {
00511 if (argv[i+1] != 0) {
00512 if (0 > ccn_extend_dict(argv[i+1], dtags, &dtags)) {
00513 fprintf(stderr, "Unable to load dtag dictionary %s\n", argv[i+1]);
00514 dictres = -1;
00515 }
00516 i++;
00517 }
00518 continue;
00519 }
00520 if (dictres < 0)
00521 exit(1);
00522 fprintf(stderr, "<!-- Processing %s -->\n", argv[i]);
00523 res |= process_file(argv[i], flags, dtags);
00524 }
00525 return(res);
00526 }