Contributor: STEVEN DEBRUYN

{
I want to use LZH compression for a backup module in one of my programs. I
found a great working source code. I'll post it here ... only problem I have
is that it's kinda slow ... I need to compress a file of 4 Mb ... this file
contains a lot of empty space. I know this routine could be speeded up a LOT.
Here's how ... (I didn't come up with the idea)

      bytes (i.e. a file full of blanks, or nuls).  I believe
      this would be improved by preceding the encoding with
      run length compression, using 90h as the encodeing signal,
      so that  90h nn (with 2 <= nn <= 255) represents
       followed by nn repetitions, i.e. at least a total
      of nn+1 occurences of .  <90h 0> would represent 90h
      itself, and 90h cannot be run length encoded.  <90h 1>
      would represent EOF, thus embedding a specific EOF marker
      in the file.  This allows use where the actual file length
      is unknown before it is reached, i.e. in communications.

See, this guy says it's possible, now it's up to you guys to do it, I'm not
good experienced enough to come up with it myself.
Hope you can help, in the next 3 messages you'll find the LZH code.
}


{$A+,B-,D+,E+,F-,I-,L+,N+,O-,R-,S-,V-}
{$M 16384,0,655360}
program LZH_Test;
uses
  LZH;
type
  IObuf = array[0..10*1024-1] of byte;
var
  infile,outfile: file;
  ibuf,obuf: IObuf;
  s: String;

  procedure Error (msg: String);
  begin
    writeln(msg);
    HALT(1)
  end;

{$F+}
  procedure ReadNextBlock;
{$F-}
  begin
    inptr:= 0;
    BlockRead(infile,inbuf^,sizeof(ibuf),inend);
    if IoResult>0 then Error('! Error reading input file');
  end;

{$F+}
  procedure WriteNextBlock;
{$F-}
  var
    wr: word;
  begin
    BlockWrite(outfile,outbuf^,outptr,wr);
    if (IoResult>0) or (wr0 then Error('! Can''t open input file');
    inbuf:= @ibuf;
    ReadToBuffer:= ReadNextBlock;
    ReadToBuffer;
  end;

  procedure OpenOutput (fn: String);
  begin
    assign(outfile,fn); rewrite(outfile,1);
    if IoResult>0 then Error('! Can''t open output file');
    outbuf:= @obuf;
    outend:= sizeof(obuf);
    outptr:= 0;
    WriteFromBuffer:= WriteNextBlock;
  end;

begin {main}
   if ParamCount<>3 then begin
     writeln('Usage: lzhuf e(compression)|d(uncompression) infile outfile');
     HALT(1)
   end;
   OpenInput(ParamStr(2));
   OpenOutput(ParamStr(3));
   s:= ParamStr(1);
   case s[1] of
     'e','E': Encode(filesize(infile));
     'd','D': Decode
   else
     Error('! Use [D] for Decompression or [E] for Compression')
   end;
   close(infile); if IoResult>0 then Error('! Error closing input file');
   if outptr>0 then WriteNextBlock;
   close(outfile); if IoResult>0 then Error('! Error closing output file');
end.


{ LZHUF.C English version 1.0
  Based on Japanese version 29-NOV-1988
  LZSS coded by Haruhiko OKUMURA
  Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
  Edited and translated to English by Kenji RIKITAKE
  Converted to Turbo Pascal 5.0
    by Peter Sawatzki with assistance of Wayne Sullivan
}
{$i-,r-,v-,s-}
Unit LZH;
Interface
type
  bufar = array[0..0] of byte; {will be overindexed}
var
  WriteFromBuffer,
  ReadToBuffer: procedure;
  inbuf,outbuf: ^bufar;
  inptr,inend,outptr,outend: word;

  procedure Encode (bytes: LongInt);
  procedure Decode;

Implementation
Const
{-LZSS Parameters}
  N         = 4096; {Size of string buffer}
  F         = 60;   {60 Size of look-ahead buffer}
  THRESHOLD = 2;
  NODENIL   = N;    {End of tree's node}

{-Huffman coding parameters}
  N_CHAR    = 256-THRESHOLD+F;
                            {character code (= 0..N_CHAR-1)}
  T         = N_CHAR*2 -1;  {Size of table}
  R         = T-1;          {root position}
  MAX_FREQ  = $8000; {update when cumulative frequency reaches to this value}

{-Tables for encoding/decoding upper 6 bits of sliding dictionary pointer}
{-encoder table}
p_len: array[0..63] of byte =
       ($03,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$05,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08);

p_code: array[0..63] of byte =
       ($00,$20,$30,$40,$50,$58,$60,$68,$70,$78,$80,$88,$90,$94,$98,$9C,
        $A0,$A4,$A8,$AC,$B0,$B4,$B8,$BC,$C0,$C2,$C4,$C6,$C8,$CA,$CC,$CE,
        $D0,$D2,$D4,$D6,$D8,$DA,$DC,$DE,$E0,$E2,$E4,$E6,$E8,$EA,$EC,$EE,
        $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7,$F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF);

{-decoder table}
d_code: array[0..255] of byte =
       ($00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
        $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
        $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
        $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,
        $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $04,$04,$04,$04,$04,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$05,
        $06,$06,$06,$06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$09,$09,$09,$09,$09,$09,$09,$09,
        $0A,$0A,$0A,$0A,$0A,$0A,$0A,$0A,$0B,$0B,$0B,$0B,$0B,$0B,$0B,$0B,
        $0C,$0C,$0C,$0C,$0D,$0D,$0D,$0D,$0E,$0E,$0E,$0E,$0F,$0F,$0F,$0F,
        $10,$10,$10,$10,$11,$11,$11,$11,$12,$12,$12,$12,$13,$13,$13,$13,
        $14,$14,$14,$14,$15,$15,$15,$15,$16,$16,$16,$16,$17,$17,$17,$17,
        $18,$18,$19,$19,$1A,$1A,$1B,$1B,$1C,$1C,$1D,$1D,$1E,$1E,$1F,$1F,
        $20,$20,$21,$21,$22,$22,$23,$23,$24,$24,$25,$25,$26,$26,$27,$27,
        $28,$28,$29,$29,$2A,$2A,$2B,$2B,$2C,$2C,$2D,$2D,$2E,$2E,$2F,$2F,
        $30,$31,$32,$33,$34,$35,$36,$37,$38,$39,$3A,$3B,$3C,$3D,$3E,$3F);

d_len: array[0..255] of byte =
       ($03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08);

  getbuf: word = 0;
  getlen: byte = 0;
  putbuf: word = 0;
  putlen: word = 0;

  textsize: LongInt = 0;
  codesize: LongInt = 0;
  printcount: LongInt = 0;

var
  text_buf: array[0..N + F - 2] of byte;
  match_position, match_length: word;
  lson,dad: array[0..N] of word;
  rson:     array[0..N + 256] of word;

  freq: array[0..T] of word; {cumulative freq table}

{-pointing parent nodes. area [T..(T + N_CHAR - 1)] are pointers for leaves}
  prnt: array [0..T+N_CHAR-1] of word;

{-pointing children nodes (son[], son[] + 1)}
  son: array[0..T-1] of word;

  function getc: byte;
  begin
    getc:= inbuf^[inptr];
    Inc(inptr);
    if inptr=inend then ReadToBuffer
  end;

  procedure putc (c: byte);
  begin
    outbuf^[outptr]:= c;
    Inc(outptr);
    if outptr=outend then
      WriteFromBuffer
  end;

procedure InitTree;
{-Initializing tree}
var
  i: word;
begin
  for i:= N+1 to N+256 do rson[i] := NODENIL; {root}
  for i:= 0 to N-1 do     dad[i]  := NODENIL; {node}
end;

procedure InsertNode (r: word);
{-Inserting node to the tree}
Label
  Done;
var
  i,p: word;
  geq: boolean;
  c: word;
begin
  geq:= true;
  p:= N+1+text_buf[r];
  rson[r]:= NODENIL;
  lson[r]:= NODENIL;
  match_length := 0;
  while TRUE do begin
    if geq then
      if rson[p]=NODENIL then begin
        rson[p]:= r;
        dad[r] := p;
        exit
      end else
        p:= rson[p]
    else
      if lson[p]=NODENIL then begin
        lson[p]:= r;
        dad[r] := p;
        exit
      end else
        p:= lson[p];
    i:= 1;
    while (i=text_buf[p+i]) or (i=F);

    if i>THRESHOLD then begin
      if i>match_length then begin
        match_position := (r-p) AND (N-1) -1;
        match_length:= i;
        if match_length>=F then goto done;
      end;
      if i=match_length then begin
        c:= (r-p) AND (N-1) -1;
        if cNODENIL then begin
      repeat
        q:= rson[q];
      until rson[q]=NODENIL;
      rson[dad[q]]:= lson[q];
      dad[lson[q]]:= dad[q];
      lson[q]:= lson[p];
      dad[lson[p]]:= q;
    end;
    rson[q]:= rson[p];
    dad[rson[p]]:= q;
  end;
  dad[q]:= dad[p];
  if rson[dad[p]]=p then
    rson[dad[p]]:= q
  else
    lson[dad[p]]:= q;
  dad[p]:= NODENIL;
end;

function GetBit: byte;
{-get one bit}
begin
  while getlen<=8 do begin
    getbuf:= getbuf OR (WORD(getc) SHL (8-getlen));
    Inc(getlen,8);
  end;
  GetBit:= getbuf SHR 15;
  {if (getbuf AND $8000)>0 then GetBit:= 1 else GetBit:= 0;}
  getbuf:= getbuf SHL 1;
  Dec(getlen);
end;

function GetByte: Byte;
{-get a byte}
begin
  while getlen<=8 do begin
    getbuf:= getbuf OR (WORD(getc) SHL (8 - getlen));
    Inc(getlen,8);
  end;
  GetByte:= Hi(getbuf);
  getbuf:= getbuf SHL 8;
  Dec(getlen,8);
end;

procedure Putcode (l: byte; c: word);
{-output l bits}
begin
  putbuf:= putbuf OR (c SHR putlen);
  Inc(putlen,l);
  if putlen>=8 then begin
    putc(Hi(putbuf));
    Dec(putlen,8);
    if putlen>=8 then begin
      putc(Lo(putbuf));
      Inc(codesize,2);
      Dec(putlen,8);
      putbuf:= c SHL (l-putlen);
    end else begin
      putbuf:= Swap(putbuf AND $FF); {SHL 8;}
      Inc(codesize);
    end
  end
end;

procedure StartHuff;
{-initialize freq tree}
var
  i,j: word;
begin
  for i:= 0 to N_CHAR-1 do begin
    freq[i]:= 1;
    son[i] := i+T;
    prnt[i+T]:= i
  end;
  i:= 0; j:= N_CHAR;
  while j<=R do begin
    freq[j]:= freq[i]+freq[i+1];
    son[j] := i;
    prnt[i]:= j;
    prnt[i+1]:= j;
    Inc(i,2); Inc(j)
  end;
  freq[T]:= $FFFF;
  prnt[R]:= 0;
end;



procedure reconst;
{-reconstruct freq tree }
var
  i,j,k,f,l: word;
begin
  {-halven cumulative freq for leaf nodes}
  j:= 0;
  for i:= 0 to T-1 do
    if son[i]>=T then begin
      freq[j]:= (freq[i]+1) SHR 1;
      son[j] := son[i];
      Inc(j)
    end;
  {-make a tree : first, connect children nodes}
  i:= 0; j:= N_CHAR;
  while jfreq[l] then begin
      while k>freq[l+1] do Inc(l);
      freq[c]:= freq[l];
      freq[l]:= k;

      i:= son[c];
      prnt[i]:= l;
      if i0 then Inc(code,$8000);
    Inc(len);
    k:= prnt[k];
  until k=R;
  Putcode(len,code);
  update(c)
end;

procedure EncodePosition(c: word);
var
  i: word;
begin
  {-output upper 6 bits with encoding}
  i:= c SHR 6;
  Putcode(p_len[i], WORD(p_code[i]) SHL 8);
  {-output lower 6 bits directly}
  Putcode(6, (c AND $3F) SHL 10);
end;

procedure EncodeEnd;
begin
  if putlen>0 then begin
    putc(Hi(putbuf));
    Inc(codesize)
  end
end;

function DecodeChar: word;
var
  c: word;
begin
  c:= son[R];
  {-start searching tree from the root to leaves.
    choose node #(son[]) if input bit = 0
    else choose #(son[]+1) (input bit = 1)}
  while c0 do begin
    Dec(j);
    i:= (i SHL 1) OR GetBit;
  end;
  DecodePosition:= c OR (i AND $3F);
end;

{-Compression }
procedure Encode (bytes: LongInt);
{-Encoding/Compressing}
type
  ByteRec = record
              b0,b1,b2,b3: byte
            end;
var
  i,c,len,r,s,last_match_length: word;
begin
  {-write size of original text}
  with ByteRec(Bytes) do begin
    putc(b0);
    putc(b1);
    putc(b2);
    putc(b3)
  end;
  if bytes=0 then exit;
  textsize:= 0;
  StartHuff;
  InitTree;
  s:= 0;
  r:= N-F;
  fillchar(text_buf[0],r,' ');
  len:= 0;
  while (len0) do begin
    text_buf[r+len]:= getc;
    Inc(len)
  end;
  textsize := len;
  for i:= 1 to F do InsertNode(r - i);
  InsertNode(r);
  repeat
    if match_length>len then match_length:= len;
    if match_length<=THRESHOLD then begin
      match_length := 1;
      EncodeChar(text_buf[r])
    end else begin
      EncodeChar(255 - THRESHOLD + match_length);
      EncodePosition(match_position)
    end;
    last_match_length := match_length;
    i:= 0;
    while (i0) do begin
      Inc(i);
      DeleteNode(s);
      c:= getc;
      text_buf[s]:= c;
      if sprintcount then begin
      write(textsize,#13);
      Inc(printcount,1024)
    end;
    while i0 then InsertNode(r)
    end;
  until len=0;
  EncodeEnd;
  writeln('input:  ',textsize,' bytes');
  writeln('output: ',codesize,' bytes');
  writeln('compression: ',textsize*100 DIV codesize,'%');
end;

procedure Decode;
{-Decoding/Uncompressing}
type
  ByteRec = Record
              b0,b1,b2,b3: byte
            end;
var
  i,j,k,r,c: word;
  count: LongInt;
begin
  {-read size of original text}
  with ByteRec(textsize) do begin
    b0:= getc;
    b1:= getc;
    b2:= getc;
    b3:= getc
  end;
  if textsize=0 then exit;
  StartHuff;
  fillchar(text_buf[0],N-F,' ');
  r:= N-F;
  count:= 0;
  while countprintcount then begin
      write(count,#13);
      Inc(printcount,1024)
    end
  end;
  writeln(count);
end;

end.