Path: utzoo!attcan!uunet!lll-winken!lll-tis!helios.ee.lbl.gov!pasteur!ucbvax!CORNELLC.CCS.CORNELL.EDU.BITNET!ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.
From: ewilts%Ins.MRC.AdhocNet.CA%Stasis.MRC.AdhocNet.CA%UNCAEDU.@CORNELLC.CCS.CORNELL.EDU.BITNET (Ed Wilts)
Newsgroups: comp.os.vms
Subject: ARC_C.SHAR19_OF_19
Message-ID: <880624093026.02j@Ins.MRC.AdhocNet.CA>
Date: 24 Jun 88 15:30:23 GMT
Sender: daemon@ucbvax.BERKELEY.EDU
Organization: The Internet
Lines: 459
$Part19:
$ File_is="SQUASH.C"
$ Check_Sum_is=240194511
$ Copy SYS$Input VMS_SHAR_DUMMY.DUMMY
X/* ARC - Archive utility - SQUASH
X
X(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
X
X`009This is a quick hack to ARCLZW to make it handle squashed archives.
X`009Dan Lanciani (ddl@harvard.*) July 87
X
X*/
X#include
X#include "arc.h"
X
X/* definitions for the new dynamic Lempel-Zev crunching */
X
X#define BITS 13 /* maximum bits per code */
X#define HSIZE 10007 /* 80% occupancy */
X#define INIT_BITS 9 /* initial number of bits/code */
X
Xstatic INT n_bits; /* number of bits/code */
Xstatic INT maxcode; /* maximum code, given n_bits */
X#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */
Xstatic INT maxcodemax = 1 << BITS; /* largest possible code (+1) */
X
Xstatic unsigned char buf[BITS]; /* input/output buffer */
X
Xstatic unsigned char lmask[9] = /* left side masks */
X{ 0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00 };
Xstatic unsigned char rmask[9] = /* right side masks */
X{ 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
X
Xstatic INT offset; /* byte offset for code output */
Xstatic long in_count; /* length of input */
Xstatic long bytes_out; /* length of compressed output */
Xstatic unsigned INT ent;
X
Xstatic long htab[HSIZE]; /* hash code table (crunch) */
Xstatic unsigned INT codetab[HSIZE]; /* string code table (crunch) */
X
Xstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
X
Xstatic unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */
Xstatic INT free_ent; /* first unused entry */
Xstatic INT firstcmp; /* true at start of compression */
Xstatic unsigned char stack[HSIZE]; /* local push/pop stack */
X
X/*
X * block compression parameters -- after all codes are used up,
X * and compression rate changes, start over.
X */
X
Xstatic INT clear_flg;
Xstatic long ratio;
X#define CHECK_GAP 10000 /* ratio check interval */
Xstatic long checkpoint;
X
X/*
X * the next two codes should not be changed lightly, as they must not
X * lie within the contiguous general code space.
X */
X#define FIRST 257 /* first free entry */
X#define CLEAR 256 /* table clear output code */
X
Xstatic INT cl_block(t) /* table clear for block compress */
XFILE *t; /* our output file */
X{
X long rat;
X INT putcode();
X
X checkpoint = in_count + CHECK_GAP;
X
X if(in_count > 0x007fffff) /* shift will overflow */
X { rat = bytes_out >> 8;
X if(rat == 0) /* Don't divide by zero */
X rat = 0x7fffffff;
X else rat = in_count / rat;
X }
X else rat = (in_count<<8)/bytes_out;/* 8 fractional bits */
X
X if(rat > ratio)
X ratio = rat;
X else
X { ratio = 0;
X setmem`009(htab,HSIZE*sizeof(long),0xff);
X free_ent = FIRST;
X clear_flg = 1;
X putcode(CLEAR,t);
X }
X}
X
X/*****************************************************************
X *
X * Output a given code.
X * Inputs:
X * code: A n_bits-bit integer. If == -1, then EOF. This assumes
X * that n_bits =< (long)wordsize - 1.
X * Outputs:
X * Outputs code to the file.
X * Assumptions:
X * Chars are 8 bits long.
X * Algorithm:
X * Maintain a BITS character long buffer (so that 8 codes will
X * fit in it exactly). When the buffer fills up empty it and start over.
X */
X
Xstatic INT putcode(code,t) /* output a code */
XINT code; /* code to output */
XFILE *t; /* where to put it */
X{
X INT r_off = offset; /* right offset */
X INT bits = n_bits; /* bits to go */
X unsigned char *bp = buf; /* buffer pointer */
X INT n; /* index */
X
X if(code >= 0) /* if a real code */
X { /*
X * Get to the first byte.
X */
X bp += (r_off >> 3);
X r_off &= 7;
X
X /*
X * Since code is always >= 8 bits, only need to mask the first
X * hunk on the left.
X */
X *bp = (*bp&rmask[r_off]) | (code<>= (8 - r_off);
X
X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X if(bits >= 8)
X { *bp++ = code;
X code >>= 8;
X bits -= 8;
X }
X
X /* Last bits. */
X if(bits)
X *bp = code;
X
X offset += n_bits;
X
X if(offset == (n_bits << 3))
X { bp = buf;
X bits = n_bits;
X bytes_out += bits;
X do
X putc_pak(*bp++,t);
X while(--bits);
X offset = 0;
X }
X
X /*
X * If the next entry is going to be too big for the code size,
X * then increase it, if possible.
X */
X if(free_ent>maxcode || clear_flg>0)
X { /*
X * Write the whole buffer, because the input side won't
X * discover the size increase until after it has read it.
X */
X if(offset > 0)
X { bp = buf; /* reset pointer for writing */
X bytes_out += n = n_bits;
X while(n--)
X putc_pak(*bp++,t);
X }
X offset = 0;
X
X if(clear_flg) /* reset if clearing */
X { maxcode = MAXCODE(n_bits = INIT_BITS);
X clear_flg = 0;
X }
X else /* else use more bits */
X { n_bits++;
X if(n_bits == BITS)
X maxcode = maxcodemax;
X else
X maxcode = MAXCODE(n_bits);
X }
X }
X }
X
X else /* dump the buffer on EOF */
X { bytes_out += n = (offset+7) / 8;
X
X if(offset > 0)
X while(n--)
X putc_pak(*bp++,t);
X offset = 0;
X }
X}
X
X/*****************************************************************
X *
X * Read one code from the standard input. If EOF, return -1.
X * Inputs:
X * cmpin
X * Outputs:
X * code or -1 is returned.
X */
X
Xstatic INT getcode(f) /* get a code */
XFILE *f; /* file to get from */
X{
X INT code;
X static INT offset = 0, size = 0;
X INT r_off, bits;
X unsigned char *bp = buf;
X
X if(clear_flg > 0 || offset >= size || free_ent > maxcode)
X { /*
X * If the next entry will be too big for the current code
X * size, then we must increase the size. This implies reading
X * a new buffer full, too.
X */
X if(free_ent > maxcode)
X { n_bits++;
X if(n_bits == BITS)
X maxcode = maxcodemax; /* won't get any bigger now */
X else maxcode = MAXCODE(n_bits);
X }
X if(clear_flg > 0)
X { maxcode = MAXCODE(n_bits = INIT_BITS);
X clear_flg = 0;
X }
X
X for(size=0; size> 3);
X r_off &= 7;
X
X /* Get first part (low order bits) */
X code = (*bp++ >> r_off);
X bits -= 8 - r_off;
X r_off = 8 - r_off; /* now, offset into code word */
X
X /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
X if(bits >= 8)
X { code |= *bp++ << r_off;
X r_off += 8;
X bits -= 8;
X }
X /* high order bits. */
X code |= (*bp & rmask[bits]) << r_off;
X offset += n_bits;
X
X return code;
X}
X
X/*
X * compress a file
X *
X * Algorithm: use open addressing double hashing (no chaining) on the
X * prefix code / next character combination. We do a variant of Knuth's
X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
X * secondary probe. Here, the modular division first probe is gives way
X * to a faster exclusive-or manipulation. Also do block compression with
X * an adaptive reset, where the code table is cleared when the compression
X * ratio decreases, but after the table fills. The variable-length output
X * codes are re-sized at this point, and a special CLEAR code is generated
X * for the decompressor.
X */
X
XINT sqinit_cm(f,t) /* initialize for compression */
XFILE *f; /* file we will be compressing */
XFILE *t; /* where we will put it */
X{
X offset = 0;
X bytes_out = 0;
X clear_flg = 0;
X ratio = 0;
X in_count = 1;
X checkpoint = CHECK_GAP;
X maxcode = MAXCODE(n_bits = INIT_BITS);
X free_ent = FIRST;
X setmem(htab,HSIZE*sizeof(long),0xff);
X n_bits = INIT_BITS; /* set starting code size */
X
X firstcmp = 1; /* next byte will be first */
X}
X
XINT sqputc_cm(c,t) /* compress a character */
Xunsigned char c; /* character to compress */
XFILE *t; /* where to put it */
X{
X static long fcode;
X static INT hshift;
X#ifdef VAXC
X register INT i;
X#else
X INT i;
X#endif VAXC
X INT disp;
X
X if(firstcmp) /* special case for first byte */
X { ent = c; /* remember first byte */
X
X hshift = 0;
X for(fcode=(long)HSIZE; fcode<65536L; fcode*=2L)
X hshift++;
X hshift = 8 - hshift; /* set hash code range bund */
X
X firstcmp = 0; /* no longer first */
X return;
X }
X
X in_count++;
X fcode =(long)(((long)c << BITS)+ent);
X i = (c< HSIZE) printf("%SQUASH-W-BOUND, i > HSIZE\n");
X#endif VAXC
X
X if(htab[i] == fcode)
X { ent = codetab[i];
X return;
X }
X if(htab[i] > 0)
X goto probe;
X
Xnomatch:
X putcode(ent,t);
X ent = c;
X if(free_ent < maxcodemax)
X { codetab[i] = free_ent++; /* code -> hashtable */
X htab[i] = fcode;
X }
X else if((long)in_count >= checkpoint)
X cl_block(t);
X}
X
Xlong sqpred_cm(t) /* finish compressing a file */
XFILE *t; /* where to put it */
X{
X putcode(ent,t); /* put out the final code */
X putcode(-1,t); /* tell output we are done */
X
X return bytes_out; /* say how big it got */
X}
X
X/*
X * Decompress a file. This routine adapts to the codes in the file
X * building the string table on-the-fly; requiring no table to be stored
X * in the compressed file. The tables used herein are shared with those of
X * the compress() routine. See the definitions above.
X */
X
XINT sqdecomp(f,t) /* decompress a file */
XFILE *f; /* file to read codes from */
XFILE *t; /* file to write text to */
X{
X unsigned char *stackp;
X INT finchar;
X INT code, oldcode, incode;
X
X n_bits = INIT_BITS; /* set starting code size */
X clear_flg = 0;
X
X /*
X * As above, initialize the first 256 entries in the table.
X */
X maxcode = MAXCODE(n_bits=INIT_BITS);
X for(code = 255; code >= 0; code--)
X { prefix[code] = 0;
X suffix[code] = (unsigned char)code;
X }
X free_ent = FIRST;
X
X finchar = oldcode = getcode(f);
X if(oldcode == -1) /* EOF already? */
X return; /* Get out of here */
X putc_unp((char)finchar,t); /* first code must be 8 bits=char */
X stackp = stack;
X
X while((code = getcode(f))> -1)
X { if(code==CLEAR)
X { for(code = 255; code >= 0; code--)
X prefix[code] = 0;
X clear_flg = 1;
X free_ent = FIRST - 1;
X if((code=getcode(f))==-1)/* O, untimely death! */
X break;
X }
X incode = code;
X /*
X * Special case for KwKwK string.
X */
X if(code >= free_ent)
X { *stackp++ = finchar;
X code = oldcode;
X }
X
X /*
X * Generate output characters in reverse order
X */
X while(code >= 256)
X { *stackp++ = suffix[code];
X code = prefix[code];
X }
X *stackp++ = finchar = suffix[code];
X
X /*
X * And put them out in forward order
X */
X do
X putc_unp(*--stackp,t);
X while(stackp > stack);
X
X /*
X * Generate the new entry.
X */
X if((code=free_ent) < maxcodemax)
X { prefix[code] = (unsigned short)oldcode;
X suffix[code] = finchar;
X free_ent = code+1;
X }
X /*
X * Remember previous code.
X */
X oldcode = incode;
X }
X}
X
$ GoSub Convert_File
$ Exit