#include #include #include #include #define WMAX 256 #define HMAX 256 #define CHARH 8 #define RL32(arr, ofs) (arr[ofs] + (arr[ofs+1] << 8) + (arr[ofs+2] << 16) + (arr[ofs+3] << 24)) typedef struct { int count; int non_tip; //zero if this is the tip of a subtree (no parent) int zero, one; //indexes of zero and one branch int nleaves; //non-leaf node if non-zero union { union { int leaves[256];//indexes to leaves belonging to this entry } n; struct { int sym; //huffman symbol if leaf int len; //length in bits } l; } u; } huffsym; huffsym syms[512]; int nglyphs = 0; char hufftab[256] = {0}; void print_bits(FILE *f, int sym, int len) { while (len) { fprintf(f, "%i", sym&1); sym >>= 1; len--; } } void set_hufftab(int x, int v) { if (hufftab[x]) { fprintf(stderr, "ERROR: hufftab[%i] == %i\n", x, hufftab[x]); exit(1); } hufftab[x] = v; } int curglyph = 0; char font[256][CHARH][8]; unsigned char glyphdata[256]; //bytes to be stuffed into the font void print_leaf(int x, int bits, int nbits, int pass) { int y; if (pass == 1) { syms[x].u.l.sym = bits; syms[x].u.l.len = nbits; } else { printf("Glyph%i\t\t\t\t;%c = ", curglyph, x); print_bits(stdout, syms[x].u.l.sym, syms[x].u.l.len); printf("\n"); if (x == '!') printf("GlyphExcl\n"); /* upside-down as is typical for sprites */ for (y = CHARH-1; y >= 0; y--) { int b = (glyphdata[curglyph*2+(7-y)/4] >> (((7-y)<<1)&7))&3; printf("\t.byte %%%i%i%i%i%i%i%i%i\n", font[x][y][0], font[x][y][1], font[x][y][2], font[x][y][3], font[x][y][4], font[x][y][5], b >> 1, b & 1); } curglyph++; } } void fill_hufftab(int sym, int ofs, int symleft, int bits, int nbits, int pass) { int zero = syms[sym].zero; int one = syms[sym].one; int zero_children = syms[zero].nleaves ? syms[zero].nleaves : 1; int one_children = syms[one].nleaves ? syms[one].nleaves : 1; if (pass == 1) set_hufftab(ofs, one_children); if (one_children > 1) fill_hufftab(one, ofs+1, one_children, bits | (1 << nbits), nbits+1, pass); else print_leaf(one, bits | (1 << nbits), nbits+1, pass); symleft -= one_children; if (symleft > 1) fill_hufftab(zero, ofs+one_children, symleft, bits, nbits+1, pass); //fill_hufftab(zero, ofs+one_children-1, symleft); //two bytes smaller //fill_hufftab(zero, ofs+(one_children*2+2)/3, symleft); //27% smaller, requires tricky division else print_leaf(zero, bits, nbits+1, pass); } /*void append_bit(int sym, int bit) { int x; if (syms[sym].nleaves) { fprintf(stderr, "%i leaves\n", syms[sym].nleaves); for (x = 0; x < syms[sym].nleaves; x++) { syms[syms[sym].u.n.leaves[x]].u.l.sym |= bit << syms[syms[sym].u.n.leaves[x]].u.l.len; syms[syms[sym].u.n.leaves[x]].u.l.len++; fprintf(stderr, "%c (%3i): %2i bits x %i\n", syms[sym].u.n.leaves[x], sym, syms[syms[sym].u.n.leaves[x]].u.l.len, syms[syms[sym].u.n.leaves[x]].count); } } else { syms[sym].u.l.sym = bit; syms[sym].u.l.len = 1; fprintf(stderr, "%c (%3i): %2i bits x %i\n", sym, sym, syms[sym].u.l.len, syms[sym].count); } syms[sym].non_tip = 1; }*/ int main(int argc, char **argv) { uint8_t header[54], image[HMAX][WMAX]; int x, y, w, h, bpp; FILE *f; char text[65536]; int c, x2, y2, textlen = 0; double entropy = 0; int bits = 0, symofs = 256, lowest, nlowest, lcount, nlcount, g; int curbyte = 0, curbits = 0; int outbytes = 0; //output size vs. uncompressed (does not include font) memset(syms, 0, sizeof(syms)); /* read 8-bit images as they are, RGB images as grayscale */ if (!(f = fopen(argv[1], "rb"))) { fprintf(stderr, "failed to open %s\n" , argv[1]); return 1; } fread(header, 54, 1, f); w = RL32(header, 18); h = RL32(header, 22); bpp = header[28] + (header[29] << 8); fseek(f, RL32(header, 10), SEEK_SET); fprintf(stderr, "%ix%i %i bpp\n", w, h, bpp); if (w > WMAX || h > HMAX) { fprintf(stderr, "image too large\n"); return -1; } for (y = h-1; y >= 0; y--) { switch(bpp) { case 8: fread(image[y], w, 1, f); break; case 24: case 32: for (x = 0; x < w; x++) { int temp = getc(f) + getc(f) + getc(f); if (bpp == 32) getc(f); image[y][x] = temp / 3; } break; default: fprintf(stderr, "can't handle %i-bit BMPs\n", bpp); } /* align */ if (w*(bpp/8) & 3) fseek(f, 4 - (w*(bpp/8) & 3), SEEK_CUR); } fclose(f); for (y2 = 0; y2 < 16; y2++) for (x2 = 0; x2 < 16; x2++) for (y = 0; y < CHARH; y++) for (x = 0; x < 8; x++) font[y2*16+x2][y][x] = image[y+y2*CHARH][x+x2*8] < 128; for (;;) { if ((c = getc(stdin)) < 0) break; if (c == '\n') c = ' '; else if (c < ' ') continue; text[textlen] = c; if (!syms[text[textlen]].count++) nglyphs++; textlen++; } //build symbols for (;;) { int lleaves, nlleaves; //find the two entries in syms with lowest count lowest = -1; for (x = 0; x < symofs; x++) if (syms[x].count && (lowest == -1 || syms[x].count < syms[lowest].count) && !syms[x].non_tip) lowest = x; nlowest = -1; for (x = 0; x < symofs; x++) if (x != lowest && syms[x].count && (nlowest == -1 || syms[x].count < syms[nlowest].count) && !syms[x].non_tip) nlowest = x; if (nlowest == -1) break; /*append_bit(lowest, 0); append_bit(nlowest, 1);*/ syms[lowest].non_tip = 1; syms[nlowest].non_tip = 1; syms[symofs].zero = lowest; syms[symofs].one = nlowest; syms[symofs].count = syms[lowest].count + syms[nlowest].count; lleaves = syms[lowest].nleaves ? syms[lowest].nleaves : 1; nlleaves = syms[nlowest].nleaves ? syms[nlowest].nleaves : 1; syms[symofs].nleaves = lleaves + nlleaves; if (syms[lowest].nleaves) memcpy(syms[symofs].u.n.leaves, syms[lowest].u.n.leaves, lleaves*sizeof(int)); else syms[symofs].u.n.leaves[0] = lowest; if (syms[nlowest].nleaves) memcpy(&syms[symofs].u.n.leaves[lleaves], syms[nlowest].u.n.leaves, nlleaves*sizeof(int)); else syms[symofs].u.n.leaves[lleaves] = nlowest; fprintf(stderr, "%i: %i,%i have %i,%i leaves\n", symofs, syms[symofs].zero, syms[symofs].one, syms[lowest].nleaves, syms[nlowest].nleaves); symofs++; } printf("CHARH equ %i\n", CHARH); printf("NSYM equ %i\n", nglyphs); fill_hufftab(lowest, 0, nglyphs, 0, 0, 1); printf("\tMAC TEXT\n"); printf("Text\n"); //pack for (x = 0; x < textlen; x++) { printf(";"); print_bits(stdout, curbyte, curbits); printf(" + "); print_bits(stdout, syms[text[x]].u.l.sym, syms[text[x]].u.l.len); curbyte |= syms[text[x]].u.l.sym << curbits; curbits += syms[text[x]].u.l.len; printf(" = "); print_bits(stdout, curbyte, curbits); printf("\n"); while (curbits > 8) { if (outbytes < nglyphs*2) glyphdata[outbytes] = curbyte & 0xFF; else printf("\tbyte $%02X\n", curbyte & 0xFF); curbyte >>= 8; curbits -= 8; outbytes++; } /*printf("\tbyte $%02X ;%c ", ((c << 3) & 0xFF) | (c >> 5), text[x]); print_bits(stdout, syms[text[x]].u.l.sym, syms[text[x]].u.l.len); printf("\n");*/ } printf("TextEnd\n"); printf("\tENDM\n"); printf("\tMAC FONT\n"); fill_hufftab(lowest, 0, nglyphs, 0, 0, 2); for (x = g = 0; x < 256; x++) { if (!syms[x].count) continue; entropy += syms[x].count*log2((double)textlen/syms[x].count); bits += syms[x].count*syms[x].u.l.len; } printf("FontEnd\n"); printf("\tENDM\n"); //find length of hufftab and output it for (x = y = 0; x < 256; x++) if (hufftab[x]) y = x; printf("\tMAC HUFFTAB\n"); printf("HuffTab\t;%i bytes\n", y+1); for (x = 0; x <= y; x++) { printf("\tbyte %i\n", hufftab[x]); outbytes++; } printf("\tENDM\n"); //add size of decoder outbytes += 66; //subtract stuffed bytes outbytes -= nglyphs*2; printf(";entropy = %.2lf bits vs %i bits vs %i huffman bits, ratio = %.2lf\n", entropy, 8*textlen, bits, 8*textlen/entropy); printf(";huff: %i B -> %i B, ratio = %.2lf\n", textlen, (bits+7)/8, (double)textlen / ((bits+7)/8)); printf(";huff total: %i B vs %i B uncompressed\n", outbytes, textlen); if (nglyphs > 64) printf(";simple 7/8 packed + stuffed size: %i B + decoder\n", textlen*7/8 - nglyphs*2); else printf(";simple 6/8 packed + stuffed size: %i B + decoder\n", textlen*6/8 - nglyphs*2); printf(";%i B stuffed into glyphs\n", nglyphs*2); if (outbytes > textlen) { fprintf(stderr, "%i > %i - Huffman encoding not worth it (TODO: make program choose the most efficient method)\n", outbytes, textlen); return 1; } return 0; }