|
34 | 34 | -- input file as a result. Most fonts files should work however, due to their low
|
35 | 35 | -- entropy nature.
|
36 | 36 |
|
| 37 | +-- The exact format of the output is the following: |
| 38 | +-- - 1 byte indicating the offset to the lookup table |
| 39 | +-- - 1 byte indicating the offset to the bitstream |
| 40 | +-- - the huffman tree |
| 41 | +-- - the lookup table |
| 42 | +-- - the bitstream |
| 43 | +-- |
| 44 | +-- The tree itself is made of nodes that are two bytes exactly. The root node is |
| 45 | +-- at offset 2 of the output, just past the two offset bytes. |
| 46 | +-- |
| 47 | +-- The meaning of each byte in the nodes is the following: |
| 48 | +-- - each byte is signed |
| 49 | +-- - the first byte is the left "pointer" of the node (or bit 0 from the bitstream) |
| 50 | +-- - the second byte is the right "pointer" of the node (or bit 1 from the bitstream) |
| 51 | +-- - if a byte is strictly superior to 0, it represents the offset to the next node |
| 52 | +-- --> this means the first ever byte in the tree is most likely going to be 0x04 |
| 53 | +-- --> this means all positive bytes are even, as each node is 2 bytes large |
| 54 | +-- - if a byte is negative or 0, it represents the negated value of the lut index |
| 55 | +-- --> a leaf node isn't stored in the tree, instead we flag its "pointer" |
| 56 | +-- --> signed pointers aren't big enough to store a full byte, hence the lut |
| 57 | +-- |
| 58 | +-- This should explain the decompression algorithm: |
| 59 | +-- |
| 60 | +-- // root node is at offset 2 |
| 61 | +-- int8_t c = 2; |
| 62 | +-- // as long as we didn't hit a leaf, continue down the tree |
| 63 | +-- // this is a nice bgtz, at the end of the loop, which doesn't even |
| 64 | +-- // need to be primed, as c == 2 at first |
| 65 | +-- while (c > 0) { |
| 66 | +-- // refresh our bitbucket if needed |
| 67 | +-- // the constant 0x100 is easy to store in a register for a simple beq, |
| 68 | +-- // while the constant 0x10000 is easy to get from a lui |
| 69 | +-- if (bb == 0x100) bb = *data++ | 0x10000; |
| 70 | +-- // get the next pointer, and consume a bit off the bitbucket |
| 71 | +-- uint32_t bit = bb & 1; |
| 72 | +-- bb >>= 1; |
| 73 | +-- c = tree[c + bit]; |
| 74 | +-- } |
| 75 | +-- // we hit a leaf, get the corresponding byte off the lut |
| 76 | +-- auto b = lut[-c]; |
| 77 | +-- |
| 78 | +-- Several notes: |
| 79 | +-- - this scheme doesn't allow for a lot of nodes in the tree (62 max) |
| 80 | +-- - however, this saves on instructions in the decompression code |
| 81 | +-- --> no need to multiply the "next" pointer by 2 |
| 82 | +-- --> no need to adjust offsets from the root of the tree |
| 83 | +-- - these optimizations work well due to the nature of 8 pixels wide fonts |
| 84 | +-- --> entropy in a 1bpp encoding will be low enough |
| 85 | +-- --> the current system font has 45 distinct bytes, well below the limit |
| 86 | + |
37 | 87 | -- Run this code with, for example, the following command:
|
38 |
| --- ./pcsx-redux -cli -exec "dofile 'font-compress.lua' compressFont 'font.raw' PCSX.quit()" |
| 88 | +-- ./pcsx-redux -cli -dofile font-compress.lua -exec "compressFont 'font.raw' PCSX.quit()" |
39 | 89 |
|
40 | 90 | -- First part of this code is a generic min-heap class. It can easily be extracted
|
41 | 91 | -- into its own independant library for other means.
|
@@ -210,11 +260,11 @@ function compressFont(fontfile)
|
210 | 260 | local bitstream = {}
|
211 | 261 | local bitbucket = 1
|
212 | 262 | local function pushBit(bit)
|
| 263 | + bitbucket = bitbucket * 2 + bit |
213 | 264 | if bitbucket >= 256 then
|
214 | 265 | bitstream[#bitstream + 1] = bitbucket - 256
|
215 | 266 | bitbucket = 1
|
216 | 267 | end
|
217 |
| - bitbucket = bitbucket * 2 + bit |
218 | 268 | end
|
219 | 269 | local function encode(byte)
|
220 | 270 | local function buildEncoding(node)
|
|
0 commit comments