CNUB logo Huntington Xavier University
Family Dollar
McDonald's
UDF Dairy Farmers Kroger groceries American Telegraph and Telephone WEBN home page

deLZW

Modified: 2009/01/29 01:23 by gerald - Categorized as: Monster Bash
The FAST System (core to Monster Bash and ScubaVenture) uses a custom method to pack data files that is based on the LZW algorithm (See LZW on Wikipedia)).

SEA, Inc. developed the original master compression algorithm and file format (.ARC). It is called LZW compression (Limpel-Ziv-Welch: the scientists who cooperated to create/invent it). CompuServe, Inc. used LZW as the basis of their .GIF design.

I call LZW and ZIP "masterful" because they foregoe the "compression lookup table" required by Huffman encoding which was boss before LZW. This made this kind of compression king of the hill for quite a while. After I had written the resource file code for Monster Bash, the LZW algorithm was bought by UniSys, Inc. and patented. At this time, I am pretty sure that THAT particular patent has expired.

I adapted the LZW algorithm for FAST somewhat, but a standard decoder for it might decompress the files to the form that they are used internally by Monster Bash. Note that, in addition, each of the file types has it own compression/interpretation scheme also (adapted to the particular type of data). Look here for information on Hacking Monster Bash. Monster Bash is actually a HUGE game (well, certainly big for its time anyway). Incredible that we managed to squeeze each of the chapters onto single floppies. (toot, toot!).

You can find a good discussion of the LZW algorithm and commented C code here: http://www.dogma.net/markn/articles/lzw/lzw.htm.


This is a conversion from 8086 machine code to C of deLZW:

/* File: deLZW.c
   I know this is REAL fugly code with all the globals and no comments,
   but, for one, it is an adaption of a highly optimized piece of hand-coded
   8086 assembly code {circa 1992, the machine code for all of this 'C' code
   is only 378 bytes (eat your heart out, GCC and Visual C++ ;) }.
   For two, you can reference a commented copy of the LZW source from
   http://www.dogma.net/markn/articles/lzw/lzw.htm. */ 

typedef          int       BOOL;
typedef unsigned char      BYTE;
typedef unsigned short int WORD;
typedef unsigned long      DWORD;

static bool   eof, rep90;
static DWORD* src;   // input  cursor
static BYTE*  dst;   // output cursor
static BYTE*  edst;  // end of dst (original dst + dstSize)
static DWORD  buf;   // input bit buffer
static int    size;  // current number of bits per input code
static DWORD  mask;  // current mask to AND input bits with (== size**2 - 1)
static int    bufSize;  // number of good bits still in buf
static WORD*  table;
static WORD*  lastTableEntry;
static WORD*  newCode;  // points into table where the next bump
                        //   in code size will occur.

BOOL
initLZW() {
  table = new WORD[0x3D02/2];
  return (int)table;
}

void
exitLZW() {
  delete table; table = 0;
}

static void
put(int code) {
  if (rep90) {
    rep90 = false;
    *dst++ = (BYTE)code;
  } else if (dst[-1] != 0x90)
    *dst++ = (BYTE)code;
  else if (code == 0)
    rep90 = true;
  else {
    BYTE* d = dst-2;
    BYTE rep = *d++;
    while (--code)
      *d++ = rep;
    dst = d;
  }
}

static void
putCode(int code) {
  if (code <= 0x100)
    put(code);
  else {
    WORD* entryp = (WORD*)((BYTE*)table+code);
    putCode(*entryp++);
    code = *entryp;
    while (code > 0x100)
      code = *(WORD*)((BYTE*)table+code);
    put(code);
  }
}

static int reset();

static int
get() {
  int code;
  if (bufSize < size) {
    DWORD d = *src++;
    code = d << bufSize | buf;
    buf = d >> size - bufSize;
    bufSize += 32 - size;
  } else {
    code = buf;
    buf >>= size;
    bufSize -= size;
  }
  if ((code &= mask) > 0x100)
    code = code * 4 - 0x300;
  else if (code == 0x100) {
    if (lastTableEntry != table + (0x3D00/2)) {
      putCode(*lastTableEntry);
      if (dst == edst) {
        eof = true;
        return 0;
      }
    }
    code = reset();
  }
  return code;
}

static int
reset() {
  mask = 0x1FF;
  newCode = table + 0x500/2;
  buf = 0;
  size = 9; bufSize = 0;
  int code = get();
  *(lastTableEntry = table + 0x104/2) = (WORD)code;
  return code;
}

void
deLZW(void* _src, void *_dst, int dstSize) {
  bool preallocated = table != 0;
  if (!preallocated && !initLZW()) return;
  edst = (dst = (BYTE*)_dst) + dstSize;
  src = (DWORD*)_src;
  rep90 = true;
  eof = false;
  reset();
  do {
    int code = get();
    if (eof) break;
    if (lastTableEntry == table + (0x3D00/2)) {
      *lastTableEntry = (WORD)code;
      putCode(code);
    } else {
      putCode(*lastTableEntry++);
      *lastTableEntry++ = (WORD)code;
      *lastTableEntry   = (WORD)code;
      if (lastTableEntry == newCode)
        if (size == 12)
          putCode(*lastTableEntry);
        else {
          size++;
          mask = (mask << 1) + 1;
          newCode = (WORD*)((BYTE*)table + (1 << size + 2) - 0x300);
        }
    }
  } while (dst != edst);
  if (!preallocated) exitLZW();
}