ccn_xmltoccnb.c

Go to the documentation of this file.
00001 /**
00002  * @file ccn_xmltoccnb.c
00003  * Utility to convert XML into ccn binary encoded data (ccnb).
00004  *
00005  * A CCNx command-line utility.
00006  *
00007  * Copyright (C) 2008, 2009 Palo Alto Research Center, Inc.
00008  *
00009  * This work is free software; you can redistribute it and/or modify it under
00010  * the terms of the GNU General Public License version 2 as published by the
00011  * Free Software Foundation.
00012  * This work is distributed in the hope that it will be useful, but WITHOUT ANY
00013  * WARRANTY; without even the implied warranty of MERCHANTABILITY or
00014  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
00015  * for more details. You should have received a copy of the GNU General Public
00016  * License along with this program; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 #include <fcntl.h>
00021 #include <stddef.h>
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <unistd.h>
00026 
00027 #include <expat.h>
00028 
00029 #include <ccn/coding.h>
00030 #include <ccn/charbuf.h>
00031 #include <ccn/extend_dict.h>
00032 
00033 static void
00034 usage(const char *progname)
00035 {
00036     fprintf(stderr,
00037             "usage: %s [-h] [-w] [-d dict]* file ...\n"
00038             " Utility to convert XML into ccn binary encoded data (ccnb)\n"
00039             "  -h       print usage and exit\n"
00040             "  -w       toss UDATA content consisting of only whitespace\n"
00041             "  -d dict  additional csv format dictionary file(s)\n"
00042             " use - for file to specify filter mode (stdin, stdout)\n"
00043             " otherwise output files get .ccnb extension\n",
00044             progname);
00045     exit(1);
00046 }
00047 
00048 struct ccn_encoder_stack_item {
00049     size_t start;
00050     size_t end;
00051     struct ccn_encoder_stack_item *link;
00052 };
00053 
00054 struct ccn_encoder {
00055     struct ccn_charbuf *openudata;
00056     int is_base64binary;
00057     int is_hexBinary;
00058     int is_text;
00059     int toss_white;
00060     const struct ccn_dict_entry *tagdict;
00061     int tagdict_count;
00062     FILE *outfile;
00063 };
00064 
00065 struct base64_decoder {
00066     size_t input_processed;
00067     size_t result_size;
00068     unsigned char *output;
00069     size_t output_size;
00070     unsigned partial;
00071     int phase;
00072 };
00073 
00074 /* "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" */
00075 static void
00076 base64_decode_bytes(struct base64_decoder *d, const void *p, size_t count)
00077 {
00078     size_t i;
00079     size_t oi = d->result_size;
00080     const char *s = p;
00081     unsigned partial = d->partial;
00082     unsigned endgame = partial & 0x100;
00083     int phase = d->phase;
00084     char ch;
00085     if (phase < 0)
00086         return;
00087     for (i = 0; i < count; i++) {
00088         ch = s[i];
00089         /*
00090          * We know we have UTF-8, hence ascii for the characters we care about.
00091          * Thus the range checks here are legitimate.
00092          */
00093         if ('A' <= ch && ch <= 'Z')
00094             ch -= 'A';
00095         else if ('a' <= ch && ch <= 'z')
00096             ch -= 'a' - 26;
00097         else if ('0' <= ch && ch <= '9')
00098             ch -= '0' - 52;
00099         else if (ch == '+')
00100             ch = 62;
00101         else if (ch == '/')
00102             ch = 63;
00103         else if (ch == ' ' || ch == '\t' || ch == '\n')
00104             continue;
00105         else if (ch == '=')
00106             if (phase > 4 || (partial & 3) != 0)
00107                 phase = -1;
00108             else {
00109                 phase -= 2;
00110                 partial >>= 2;
00111                 endgame = 0x100;
00112                 continue;
00113             }
00114         else {
00115             phase = -1;
00116             break;
00117         }
00118         if (endgame != 0) {
00119             phase = -1;
00120             break;
00121         }
00122         partial <<= 6;
00123         partial |= ch;
00124         phase += 6;
00125         if (phase >= 8) {
00126             if (oi < d->output_size)
00127                 d->output[oi] = partial >> (phase - 8);
00128             oi += 1;
00129             phase -= 8;
00130         }
00131     }
00132     d->phase = phase;
00133     d->partial = partial & ((1<<6)-1);
00134     d->result_size = oi;
00135 }
00136 
00137 static int
00138 dict_lookup(const char *key, const struct ccn_dict_entry *dict, int n)
00139 {
00140     int i;
00141     for (i = 0; i < n; i++)
00142         if (0 == strcmp(key, dict[i].name))
00143             return (dict[i].index);
00144     return (-1);
00145 }
00146 
00147 struct ccn_encoder *
00148 ccn_encoder_create(FILE *outfile, const struct ccn_dict *dtags)
00149 {
00150     struct ccn_encoder *c;
00151     c = calloc(1, sizeof(*c));
00152     if (c) {
00153         c->openudata = ccn_charbuf_create();
00154         if (c->openudata != NULL)
00155             ccn_charbuf_reserve(c->openudata, 128);
00156         c->outfile = outfile;
00157         c->tagdict = dtags->dict;
00158         c->tagdict_count = dtags->count;
00159     }
00160     return(c);
00161 }
00162 
00163 void
00164 ccn_encoder_destroy(struct ccn_encoder **cbp)
00165 {
00166     struct ccn_encoder *c = *cbp;
00167     if (c != NULL) {
00168         ccn_charbuf_destroy(&c->openudata);
00169         free(c);
00170         *cbp = NULL;
00171     }
00172 }
00173 
00174 static void
00175 emit_bytes(struct ccn_encoder *u, const void *p, size_t length)
00176 {
00177     /* Write errors to files are checked with ferror before close. */
00178     (void)fwrite(p, 1, length, u->outfile);
00179 }
00180 
00181 static void
00182 emit_tt(struct ccn_encoder *u, size_t numval, enum ccn_tt tt)
00183 {
00184     unsigned char buf[1+8*((sizeof(numval)+6)/7)];
00185     unsigned char *p = buf + (sizeof(buf)-1);
00186     int n = 1;
00187     p[0] = (CCN_TT_HBIT & ~CCN_CLOSE) |
00188            ((numval & CCN_MAX_TINY) << CCN_TT_BITS) |
00189            (CCN_TT_MASK & tt);
00190     numval >>= (7-CCN_TT_BITS);
00191     while (numval != 0) {
00192         (--p)[0] = (((unsigned char)numval) & ~CCN_TT_HBIT) | CCN_CLOSE;
00193         n++;
00194         numval >>= 7;
00195     }
00196     emit_bytes(u, p, n);
00197 }
00198 
00199 static int
00200 all_whitespace(struct ccn_charbuf *b)
00201 {
00202     size_t i;
00203     size_t n = b->length;
00204     for (i = 0; i < n; i++) {
00205         switch (b->buf[i]) {
00206             case ' ':
00207             case '\t':
00208             case '\n':
00209                 continue;
00210         }
00211         return(0);
00212     }
00213     return(1);
00214 }
00215 
00216 static void
00217 finish_openudata(struct ccn_encoder *u)
00218 {
00219     if (u->is_base64binary) {
00220         unsigned char *obuf = NULL;
00221         ssize_t len = -1;
00222         size_t maxbinlen = u->openudata->length * 3 / 4 + 4;
00223         struct base64_decoder d = { 0 };
00224         u->is_base64binary = 0;
00225         obuf = ccn_charbuf_reserve(u->openudata, maxbinlen);
00226         if (obuf != NULL) {
00227             d.output = obuf;
00228             d.output_size = maxbinlen;
00229             base64_decode_bytes(&d, u->openudata->buf, u->openudata->length);
00230             if (d.phase == 0 && d.result_size <= d.output_size)
00231                 len = d.result_size;
00232         }
00233         if (len == -1) {
00234             fprintf(stderr,
00235                 "could not decode base64binary, leaving as character data\n");
00236         }
00237         else {
00238             emit_tt(u, len, CCN_BLOB);
00239             emit_bytes(u, obuf, len);
00240             u->openudata->length = 0;
00241             return;
00242         }
00243     }
00244     else if (u->is_hexBinary) {
00245         size_t maxbinlen = (u->openudata->length + 1)/2;
00246         unsigned char *obuf = NULL;
00247         int v = -1;
00248         size_t i;
00249         size_t j = 0;
00250         unsigned char ch;
00251         u->is_hexBinary = 0;
00252         obuf = ccn_charbuf_reserve(u->openudata, maxbinlen);
00253         if (obuf != NULL) {
00254             for (v = 1, i = 0, j = 0; v > 0 && i < u->openudata->length; i++) {
00255                 ch = u->openudata->buf[i];
00256                 if (ch <= ' ')
00257                     continue;
00258                 v = (v << 4) + (('0' <= ch && ch <= '9') ? (ch - '0') :
00259                                 ('A' <= ch && ch <= 'F') ? (ch - 'A' + 10) :
00260                                 ('a' <= ch && ch <= 'f') ? (ch - 'a' + 10) :
00261                                 -1024);
00262                 if (v > 255) {
00263                     if (j >= maxbinlen)
00264                         break;
00265                     obuf[j++] = v & 255;
00266                     v = 1;
00267                 }
00268             }
00269         }
00270         if (v != 1) {
00271             fprintf(stderr,
00272                     "could not decode hexBinary, leaving as character data\n");
00273         }
00274         else {
00275             emit_tt(u, j, CCN_BLOB);
00276             emit_bytes(u, obuf, j);
00277             u->openudata->length = 0;
00278             return;
00279         }
00280     }
00281     else if (u->is_text) {
00282         u->is_text = 0;
00283         emit_tt(u, u->openudata->length, CCN_BLOB);
00284         emit_bytes(u, u->openudata->buf, u->openudata->length);
00285         u->openudata->length = 0;
00286         return;
00287     }
00288     if (u->openudata->length != 0) {
00289         if (!(u->toss_white && all_whitespace(u->openudata))) {
00290             emit_tt(u, u->openudata->length, CCN_UDATA);
00291             emit_bytes(u, u->openudata->buf, u->openudata->length);
00292         }
00293         u->openudata->length = 0;
00294     }
00295 }
00296 
00297 static void
00298 emit_name(struct ccn_encoder *u, enum ccn_tt tt, const void *name)
00299 {
00300     size_t length = strlen(name);
00301     int dictindex = -1;
00302     if (length == 0) return; /* should never happen */
00303     finish_openudata(u);
00304     if (tt == CCN_TAG) {
00305         dictindex = dict_lookup(name, u->tagdict, u->tagdict_count);
00306         if (dictindex >= 0) {
00307             emit_tt(u, dictindex, CCN_DTAG);
00308             return;
00309         }
00310     }
00311     emit_tt(u, length-1, tt);
00312     emit_bytes(u, name, length);
00313 }
00314 
00315 static void
00316 emit_xchars(struct ccn_encoder *u, const XML_Char *xchars)
00317 {
00318     size_t length = strlen(xchars);
00319     finish_openudata(u);
00320     emit_tt(u, length, CCN_UDATA);
00321     emit_bytes(u, xchars, length);
00322 }
00323 
00324 static void
00325 emit_closer(struct ccn_encoder *u)
00326 {
00327     static const unsigned char closer[] = { CCN_CLOSE };
00328     finish_openudata(u);
00329     emit_bytes(u, closer, sizeof(closer));
00330 }
00331 
00332 static void
00333 do_start_element(void *ud, const XML_Char *name,
00334                  const XML_Char **atts)
00335 {
00336     struct ccn_encoder *u = ud;
00337     const XML_Char **att;
00338     int is_base64binary = 0;
00339     int is_hexBinary = 0;
00340     int is_text = 0;
00341     emit_name(u, CCN_TAG, name);
00342     for (att = atts; att[0] != NULL; att += 2) {
00343         if (0 == strcmp(att[0], "ccnbencoding")) {
00344             if (0 == strcmp(att[1], "base64Binary")) {
00345                 is_base64binary = 1;
00346                 continue;
00347             }
00348             if (0 == strcmp(att[1], "hexBinary")) {
00349                 is_hexBinary = 1;
00350                 continue;
00351             }
00352             if (0 == strcmp(att[1], "text")) {
00353                 is_text = 1;
00354                 continue;
00355             }
00356             fprintf(stderr, "warning - unknown ccnbencoding found (%s)\n", att[1]);
00357         }
00358         emit_name(u, CCN_ATTR, att[0]);
00359         emit_xchars(u, att[1]);
00360     }
00361     u->is_base64binary = is_base64binary;
00362     u->is_hexBinary = is_hexBinary;
00363     u->is_text = is_text;
00364 }
00365 
00366 static void
00367 do_end_element(void *ud, const XML_Char *name)
00368 {
00369     struct ccn_encoder *u = ud;
00370     emit_closer(u);
00371 }
00372 
00373 static void
00374 do_character_data(void *ud, const XML_Char *s, int len)
00375 {
00376      struct ccn_encoder *u = ud;
00377      ccn_charbuf_append(u->openudata, s, len);
00378 }
00379 
00380 static void
00381 do_processing_instructions(void *ud, const XML_Char *target, const XML_Char *data)
00382 {
00383      struct ccn_encoder *u = ud;
00384      finish_openudata(u);
00385      emit_tt(u, CCN_PROCESSING_INSTRUCTIONS, CCN_EXT);
00386      emit_xchars(u, target);
00387      emit_xchars(u, data);
00388      emit_closer(u);
00389 }
00390 
00391 #define TOSS_WHITE 1
00392 static int
00393 process_fd(int fd, FILE *outfile, int flags, const struct ccn_dict *dtags)
00394 {
00395     char buf[17];
00396     ssize_t len;
00397     int res = 0;
00398     struct ccn_encoder *u;
00399     XML_Parser p;
00400     u = ccn_encoder_create(outfile, dtags);
00401     if (u == NULL) return(1);
00402     if (flags & TOSS_WHITE) {
00403         u->toss_white = 1;
00404     }
00405     p = XML_ParserCreate(NULL);
00406     XML_SetUserData(p, u);
00407     XML_SetElementHandler(p, &do_start_element, &do_end_element);
00408     XML_SetCharacterDataHandler(p, &do_character_data);
00409     XML_SetProcessingInstructionHandler(p, &do_processing_instructions);
00410     
00411     while ((len = read(fd, buf, sizeof(buf))) > 0) {
00412         if (XML_Parse(p, buf, len, 0) != XML_STATUS_OK) {
00413             res |= 1;
00414             break;
00415         }
00416     }
00417     if (len < 0) {
00418         perror("read");
00419         res |= 1;
00420     }
00421     if (XML_Parse(p, buf, 0, 1) != XML_STATUS_OK) {
00422       fprintf(stderr, "xml parse error line %ld\n", (long)XML_GetCurrentLineNumber(p));
00423         res |= 1;
00424     }
00425     XML_ParserFree(p);
00426     ccn_encoder_destroy(&u);
00427     
00428     return(res);
00429 }
00430 
00431 static int
00432 process_file(char *path, int flags, const struct ccn_dict *dtags)
00433 {
00434     int fd = 0;
00435     int res = 0;
00436     FILE *outfile = stdout;
00437     const char *basename;
00438     const char *ext;
00439     char *outname = NULL;
00440     const char outext[] = ".ccnb\0";
00441     if (0 != strcmp(path, "-")) {
00442         fd = open(path, O_RDONLY);
00443         if (-1 == fd) {
00444             perror(path);
00445             return(1);
00446         }
00447         basename = strrchr(path, '/');
00448         if (basename == NULL)
00449             basename = path;
00450         else
00451             basename++;
00452         ext = strrchr(basename, '.');
00453         if (ext == NULL || 0 != strcasecmp(ext, ".xml"))
00454             ext = strrchr(basename, 0);
00455         outname = calloc(1, ext - basename + sizeof(outext));
00456         if (outname == NULL) { perror("calloc"); exit(1); }
00457         memcpy(outname, basename, ext - basename);
00458         memcpy(outname + (ext - basename), outext, sizeof(outext));
00459         outfile = fopen(outname, "wb");
00460         if (outfile == NULL) {
00461             perror(outname);
00462             free(outname);
00463             outname = NULL;
00464             res |= 1;
00465         }
00466     }
00467     if (res == 0) {
00468         res = process_fd(fd, outfile, flags, dtags);
00469         fflush(outfile);
00470     }
00471     if (outfile != NULL && outfile != stdout) {
00472         if (ferror(outfile)) {
00473             res |= 1;
00474             fprintf(stderr, " %s: output error\n", outname);
00475             clearerr(outfile);
00476         }
00477         fclose(outfile);
00478         if (res == 0)
00479             fprintf(stderr, " %s written.\n", outname);
00480     }
00481     if (fd > 0)
00482         close(fd);
00483     if (res != 0 && outname != NULL) {
00484         unlink(outname);
00485     }
00486     if (outname != NULL)
00487         free(outname);
00488     return(res);
00489 }
00490 
00491 int
00492 main(int argc, char **argv)
00493 {
00494     int i;
00495     int res = 0;
00496     int dictres = 0;
00497     int flags = 0;
00498     struct ccn_dict *dtags = (struct ccn_dict *)&ccn_dtag_dict;
00499     
00500     if (argv[1] == NULL)
00501         usage(argv[0]);
00502     for (i = 1; argv[i] != 0; i++) {
00503         if (0 == strcmp(argv[i], "-h")) {
00504             usage(argv[0]);
00505         }
00506         if (0 == strcmp(argv[i], "-w")) {
00507             flags |= TOSS_WHITE;
00508             continue;
00509         }
00510         if (0 == strcmp(argv[i], "-d")) {
00511             if (argv[i+1] != 0) {
00512                 if (0 > ccn_extend_dict(argv[i+1], dtags, &dtags)) {
00513                     fprintf(stderr, "Unable to load dtag dictionary %s\n", argv[i+1]);
00514                     dictres = -1;
00515                 }
00516                 i++;
00517             }
00518             continue;
00519         }
00520         if (dictres < 0)
00521             exit(1);
00522         fprintf(stderr, "<!-- Processing %s -->\n", argv[i]);
00523         res |= process_file(argv[i], flags, dtags);
00524     }
00525     return(res);
00526 }
Generated on Fri May 13 16:27:01 2011 for Content-Centric Networking in C by  doxygen 1.6.3