Path: utzoo!attcan!uunet!lll-winken!lll-lcc!mordor!joyce!ames!pasteur!ucbvax!LBL.GOV!nagy%warner.hepnet
From: nagy%warner.hepnet@LBL.GOV (Frank J. Nagy, VAX Wizard & Guru)
Newsgroups: comp.os.vms
Subject: Lempel-Ziv file (de)compress from VAX SIG tapes (Part 1 of 3)
Message-ID: <880712052121.2760604b@LBL.Gov>
Date: 12 Jul 88 12:21:21 GMT
Sender: daemon@ucbvax.BERKELEY.EDU
Organization: The Internet
Lines: 1845

...................... Cut between dotted lines and save. .....................
$!.............................................................................
$! VAX/VMS archive file created by VMS_SHARE V06.00 26-May-1988.
$!
$! VMS_SHARE was written by James Gray (Gray:OSBUSouth@Xerox.COM) from
$! VMS_SHAR by Michael Bednarek (U3369429@ucsvc.dn.mu.oz.au).
$!
$! To unpack, simply save, concatinate all parts into one file and
$! execute (@) that file.
$!
$! This archive was created by user NAGY
$! on  6-APR-1866 20:07:23.73.
$!
$! ATTENTION: To keep each article below 127 blocks (65024 bytes), this
$!            program has been transmitted in 3 parts.  You should
$!            concatenate ALL parts to ONE file and execute (@)that file.
$!
$! It contains the following 16 files:
$!        AAAREADME.TXT
$!        DESCRIP.MMS
$!        LZ.H
$!        LZ.HLP
$!        LZCMP1.C
$!        LZCMP2.C
$!        LZCMP3.C
$!        LZDCM1.C
$!        LZDCM2.C
$!        LZDCM3.C
$!        LZIO.C
$!        LZVIO.C
$!        LZVIOISAM.C
$!        MAKEFILE.TXT
$!        README.TXT
$!        RULES.MMS
$!
$!==============================================================================
$ SET SYMBOL/SCOPE=( NOLOCAL, NOGLOBAL )
$ VERSION = F$GETSYI( "VERSION" )
$ IF VERSION .GES "V4.4" THEN GOTO VERSION_OK
$ WRITE SYS$OUTPUT "You are running VMS ''VERSION'; ", -
    "VMS_SHARE V06.00 26-May-1988 requires VMS V4.4 or higher."
$ EXIT 44 
$VERSION_OK:
$ GOTO START
$
$UNPACK_FILE:
$ WRITE SYS$OUTPUT "Creating ''FILE_IS'"
$ DEFINE/USER_MODE SYS$OUTPUT NL:
$ EDIT/TPU/COMMAND=SYS$INPUT/NODISPLAY/OUTPUT='FILE_IS'/NOSECTION -
    VMS_SHARE_DUMMY.DUMMY
b_part := CREATE_BUFFER( "{Part}", GET_INFO( COMMAND_LINE, "file_name" ) )
; s_file_spec := GET_INFO( COMMAND_LINE, "output_file" ); SET( OUTPUT_FILE
, b_part, s_file_spec ); b_errors := CREATE_BUFFER( "{Errors}" ); i_errors 
:= 0; pat_beg_1 := ANCHOR & "-+-+-+ Beginning"; pat_beg_2 := LINE_BEGIN 
& "+-+-+-+ Beginning"; pat_end := ANCHOR & "+-+-+-+-+ End"; POSITION
( BEGINNING_OF( b_part ) ); i_append_line := 0; LOOP EXITIF MARK( NONE 
) = END_OF( b_part ); s_x := ERASE_CHARACTER( 1 ); IF s_x = "+" THEN r_skip 
:= SEARCH( pat_beg_1, FORWARD, EXACT ); IF r_skip <> 0 THEN s_x := ""
; MOVE_HORIZONTAL( -CURRENT_OFFSET ); ERASE_LINE; ENDIF; ENDIF
; IF s_x = "-" THEN r_skip := SEARCH( pat_end, FORWARD, EXACT ); IF r_skip <
> 0 THEN s_x := ""; MOVE_HORIZONTAL( -CURRENT_OFFSET ); m_skip := MARK( NONE )
; r_skip := SEARCH( pat_beg_2, FORWARD, EXACT ); IF r_skip <> 0 THEN POSITION
( END_OF( r_skip ) ); MOVE_HORIZONTAL( -CURRENT_OFFSET ); MOVE_VERTICAL( 1 )
; MOVE_HORIZONTAL( -1 ); ELSE POSITION( END_OF( b_part ) ); ENDIF; ERASE
( CREATE_RANGE( m_skip, MARK( NONE ), NONE ) ); ENDIF; ENDIF
; IF s_x = "V" THEN s_x := ""; IF i_append_line <> 0 THEN APPEND_LINE
; MOVE_HORIZONTAL( -CURRENT_OFFSET ); ENDIF; i_append_line := 1; MOVE_VERTICAL
( 1 ); ENDIF; IF s_x = "X" THEN s_x := ""; IF i_append_line <
> 0 THEN APPEND_LINE; MOVE_HORIZONTAL( -CURRENT_OFFSET ); ENDIF
; i_append_line := 0; MOVE_VERTICAL( 1 ); ENDIF; IF s_x <> "" THEN i_errors 
:= i_errors + 1; s_text := CURRENT_LINE; POSITION( b_errors ); COPY_TEXT
( "The following line could not be unpacked properly:" ); SPLIT_LINE
; COPY_TEXT( s_x ); COPY_TEXT( s_text ); POSITION( b_part ); MOVE_VERTICAL( 1 
); ENDIF; ENDLOOP; POSITION( BEGINNING_OF( b_part ) ); LOOP r_x := SEARCH( "`"
, FORWARD, EXACT ); EXITIF r_x = 0; POSITION( r_x ); ERASE_CHARACTER( 1 )
; IF CURRENT_CHARACTER = "`" THEN MOVE_HORIZONTAL( 1 ); ELSE COPY_TEXT( ASCII
( INT( ERASE_CHARACTER( 3 ) ) ) ); ENDIF; ENDLOOP; IF i_errors = 0 THEN SET
( NO_WRITE, b_errors, ON ); ELSE POSITION( BEGINNING_OF( b_errors ) )
; COPY_TEXT( FAO( "The following !UL errors were detectedwhile unpacking !AS"
, i_errors, s_file_spec ) ); SPLIT_LINE; SET( OUTPUT_FILE, b_errors
, "SYS$COMMAND" ); ENDIF; EXIT; 
$ DELETE VMS_SHARE_DUMMY.DUMMY;*
$ CHECKSUM 'FILE_IS
$ WRITE SYS$OUTPUT " CHECKSUM ", -
  F$ELEMENT( CHECKSUM_IS .EQ. CHECKSUM$CHECKSUM, ",", "failed!,passed." )
$ RETURN
$
$START:
$ FILE_IS = "AAAREADME.TXT"
$ CHECKSUM_IS = 874896225
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
XThis area contains source and executable for the LZCMP and LZDCM
Xutilities used in several places on the tape to compress large
Xfiles to gain room. The sources contain documentation in comments
Xat the start of the code for those curious about the programs'
Xoperation.
X`009To use LZCMP and LZDCM, define them as DCL foreign
Xsymbols. For instance, you might use commands like
X
X$LZCMP:==$DECUS$DISK:[VAX86D.LZW]LZCMP
X$LZDCM:==$DECUS$DISK:[VAX86D.LZW]LZDCM
X
XThen to compress a file use a command like
X
X$ LZCMP -v inputfile.typ squeezedfile.typ
X
Xor to decompress the file use a command like
X
X$ LZDCM squeezedfile.typ unsqueezedfile.typ
X
XIf you use those commands, "inputfile.typ" and "unsqueezedfile.typ"
Xwill be copies of each other.
X
X`009Note that you'd have to define "DECUS$DISK" before giving the
X$ lzcmp:==...
Xetc. definitions above... the idea is to run the programs as foreign
Xcommands. The -v switch for LZCMP is the "verbose" switch so that
Xwhen LZCMP is done it'll report to you what it did. The default
Xoperation on VMS preserves file attributes in the squeezed file.
XSeveral "compatibility" options (not used on the tapes) treat the
Xfiles in one of two stream modes (text and binary) and can be used
Xfor transporting files to/from non-VMS systems.
X`009It is presumed that if you're able to read the tape in VMS
XBACKUP, you must have a VMS system handy to do the decompress on, and
Xthat in that case, preserving all the file attributes is desirable.
X
X`009USAGE NOTE FOR THE VAX SIG TAPES
X
X`009Wherever any file has a type of form  .*_LZW,  where * is any
Xextension, it is compressed by LZCMP here and should be decompressed
Xwith LZDCM before use. In all cases, where this has been done, there 
Xwill be a directory (often named THIS_DIR.LIS) of the file directory
Xtree prior to backing up and compressing. Areas have been compressed
Xby creating a VMS Backup saveset and compressing the saveset, so
Xthat only one squeezed file per directory tree need be dealt with.
X
$ GOSUB UNPACK_FILE
$ FILE_IS = "DESCRIP.MMS"
$ CHECKSUM_IS = 946674144
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
X!
X! This MMS file is used to build the LZ* programs.
X!
X! Rules and definitions...
X!
X.INCLUDE  RULES.MMS
X!
X! Dependencies:
X!`009$ mms`009`009`009`009!Build LZCMP.EXE and LZDCM.EXE
X!
Xboth`009: lzcmp.exe, lzdcm.exe
X`009@ CONTINUE
X
Xlzcmp.exe`009: lzcmp1.obj, lzcmp2.obj, lzcmp3.obj, lzio.obj, lzvio.obj
X`009$(LINK) $(LINKFLAGS) LZCMP1.OBJ, LZCMP2.OBJ, LZCMP3.OBJ, -
XLZIO.OBJ, LZVIO.OBJ, SYS$LIBRARY:VAXCRTL/OPTIONS
X
Xlzcmp1.obj`009: lzcmp1.c, lz.h
X
Xlzcmp2.obj`009: lzcmp2.c, lz.h
X
Xlzcmp3.obj`009: lzcmp3.c, lz.h
X
Xlzdcm.exe`009: lzdcm1.obj, lzdcm2.obj, lzdcm3.obj, lzio.obj, lzvio.obj
X`009$(LINK) $(LINKFLAGS) LZDCM1.OBJ, LZDCM2.OBJ, LZDCM3.OBJ, -
XLZIO.OBJ, LZVIO.OBJ, SYS$LIBRARY:VAXCRTL/OPTIONS
X
Xlzdcm1.obj`009: lzdcm1.c, lz.h
X
Xlzdcm2.obj`009: lzdcm2.c, lz.h
X
Xlzdcm3.obj`009: lzdcm3.c, lz.h
X
Xlzio.obj`009: lzio.c, lz.h
X
Xlzvio.obj`009: lzvio.c, lz.h
X
X!
X! Cleanup
X!
X.LAST`009:
X`009@  PURGE
$ GOSUB UNPACK_FILE
$ FILE_IS = "LZ.H"
$ CHECKSUM_IS = 1433596817
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
X/*
X * Header file for all lz compression/decompression routines.
X *
X * Machine/Operating system/compiler selection: (#ifdef'ed)
X * vax`009`009`009`009Vax/Unix or Vax/VMS
X * pdp11`009`009`009makes a small compressor
X * M_XENIX`009`009`009"large-model" Z8000
X * interdata`009`009`009Signed long compare is slow
X * unix`009`009`009`009Defined on true Unix systems
X * decus`009`009`009Decus C (no signal)
X * vms`009`009`009`009Vax/VMS (VMS_V4 may be set automatically)
X * #define readonly`009`009If the compiler doesn't support it correctly.
X * 
X * Compiler configuration (#if'ed):
X * #define vax_asm   TRUE/FALSE`009TRUE on Vax (4bsd) if the compiler supports
X *`009`009`009`009the asm() operator.  Check the generated code!
X * #define UCHAR     TRUE/FALSE`009TRUE if compiler supports unsigned char
X * #define DEBUG     TRUE/FALSE`009TRUE to compile in debug printouts
X *
X * Algorithm Tuning parameters:
X * #define USERMEM   `009Memory available to compress.
X *`009`009`009`009If large enough, a faster algorithm is used.
X * #define SACREDMEM `009Don't use this part of USERMEM.
X * #define BITS      `009Maximum number of code bits.
X * #define MAXIO     `009Output buffer size (squeeze memory if needed)
X */
X
X#include 
X#include 
X#include 
X#ifndef decus
X# include 
X/*
X * Arguments to signal():
X */
Xextern int`009abort();`009`009/* Debugging interrupt trap`009*/
Xextern int`009interrupt();`009`009/* Non-debugging interrupt trap`009*/
Xextern int`009address_error();`009/* "Segment" violation`009`009*/
X#endif
X
X#ifndef`009TRUE
X# define FALSE`009`0090
X# define TRUE`009`0091
X#endif
X#ifndef`009EOS
X# define EOS`009`009'\0'
X#endif
X#define`009streq(a, b)`009(strcmp((a), (b)) == 0)
X#define min(a,b)`009((a) > (b)) ? (b) : (a))
X
X/*
X * Set USERMEM to the maximum amount of physical user memory available
X * in bytes.  USERMEM is used to determine the maximum BITS that can be used
X * for compression.
X *
X * SACREDMEM is the amount of physical memory saved for others; compress
X * will hog the rest.
X */
X
X#ifndef SACREDMEM
X# define SACREDMEM`0090
X#endif
X
X/*
X * Set machine-specific parameters
X */
X
X#ifdef vax
X# ifdef unix
X#  define vax_asm`009TRUE`009`009/* If asm() supported on vax`009*/
X# endif
X#endif
X#ifndef`009vax_asm
X# define vax_asm`009FALSE
X#endif
X
X#ifdef pdp11
X# define BITS`00912`009/* max bits/code for 16-bit machine`009`009*/
X# define USERMEM 0`009/* Force no user memory`009`009`009`009*/
X# define UCHAR`009FALSE`009/* TRUE if compiler supports unsigned char`009*/
X# define MAXIO 512`009/* Buffer size for PDP-11 I/O buffers`009`009*/
X#endif
X
X/*
X * Set default values for some parameters.
X */
X
X#ifndef DEBUG
X# define DEBUG`009FALSE
X#endif
X
X#ifdef interdata
X# define SIGNED_COMPARE_SLOW TRUE
X#endif
X#ifndef SIGNED_COMPARE_SLOW
X# define SIGNED_COMPARE_SLOW FALSE
X#endif
X
X#ifndef USERMEM
X# define USERMEM 750000`009/* default user memory`009`009`009`009*/
X#endif
X
X#ifndef`009UCHAR
X# define UCHAR`009TRUE`009/* Compiler supports unsigned char`009`009*/
X#endif
X
X#ifndef MAXIO
X# define MAXIO`0092048`009/* I/O buffer size`009`009`009`009*/
X#endif
X
X/*
X * Set derived tuning parameters.
X */
X
X#ifndef USERMEM
X# define USERMEM`009 0
X#endif
X#if USERMEM >=`009`009`009(433484 + SACREDMEM)
X# define PBITS`009`00916
X#else
X# if USERMEM >=`009`009`009(229600 + SACREDMEM)
X#  define PBITS`009`00915
X# else
X#  if USERMEM >=`009`009(127536 + SACREDMEM)
X#   define PBITS`00914
X#   else
X#    if USERMEM >=`009`009( 73464 + SACREDMEM)
X#     define PBITS`00913
X#    else`009`009`009/* Smaller systems`009`009`009*/
X#     define PBITS`00912
X#    endif
X#   endif
X# endif
X#endif
X
X#ifndef BITS
X# define BITS PBITS
X#endif
X
X#ifdef M_XENIX
X# if BITS >= 16
X#  define XENIX_16`009`009/* Enable special vector access macros`009*/
X# else
X#  if BITS > 13
X#   undef BITS
X#   define BITS 13`009`009/* Code only handles BITS = 12, 13, 16`009*/
X#  endif
X# endif
X#endif
X
X/*
X * HSIZE is the size of the hash lookup table.  It is set to
X * 1 << BITS + fudge factor, rounded up to a prime number.
X * If it is too big, the "clear the hash" routine will take
X * too long.  The same numbers are replicated in the getsize()
X * routine's data table.
X */
X
X#if BITS == 16
X# define HSIZE`00969001`009`009/* 95% occupancy`009`009`009*/
X#endif
X#if BITS == 15
X# define HSIZE`00935023`009`009/* 94% occupancy`009`009`009*/
X#endif
X#if BITS == 14
X# define HSIZE`00918013`009`009/* 91% occupancy`009`009`009*/
X#endif
X#if BITS == 13
X# define HSIZE`009 9001`009`009/* 91% occupancy`009`009`009*/
X#endif
X#if BITS <= 12
X# define HSIZE`009 5003`009`009/* 80% occupancy`009`009`009*/
X#endif
X`012
X/*
X * typedef's -- somewhat machine specific.
X */
X
X/*
X * a code_int must be able to hold 2**BITS values of type int, and also -1
X */
X#if BITS > 15
Xtypedef long int`009code_int;
X#else
Xtypedef int`009`009code_int;
X#endif
X
X/*
X * A count_int must hold ((2**BITS)-1) + (255<>
X#endif
X#define`009LZ_CLEAR`009(NBR_CHAR)`009/* Clear code`009`009`009*/
X#define`009LZ_SOH`009`009(LZ_CLEAR + 1)`009/* Start of header block`009*/
X#define`009LZ_STX`009`009(LZ_SOH   + 1)`009/* Start of text block`009`009*/
X#define`009LZ_EOR`009`009(LZ_STX   + 1)`009/* End of text record`009`009*/
X#define`009LZ_ETX`009`009(LZ_EOR   + 1)`009/* End of header/text block`009*/
X#define`009LZ_FIRST`009(LZ_ETX   + 1)`009/* First user (data) code`009*/
X
X#ifdef`009vms
X#include`009`009errno
X#include`009`009ssdef
X#include`009`009stsdef
X#define`009IO_SUCCESS`009(SS$_NORMAL | STS$M_INHIB_MSG)
X#define`009IO_ERROR`009(SS$_ABORT)
X#define VMS_V4`009`009L_cuserid >= 16`009`009/* Enable new stuff`009*/
X#else
X#define VMS_V4`009`0090`009`009`009/* Disable new stuff`009*/
Xextern int`009`009errno;
X#ifdef decus
X#define`009errno`009`009$$ferr
X#endif
X#endif
X
X/*
X * Define exit() codes.
X */
X
X#ifndef`009IO_SUCCESS
X#define`009IO_SUCCESS`0090`009`009`009/* Normal exit`009`009*/
X#define`009IO_ERROR`0091`009`009`009/* Error exit`009`009*/
X#endif
X`012
X/*
X * All I/O is done by way of "streams".  To establish a stream,
X * set the parameters appropriately and off you go.  The following
X * functions are provided:
X *`009lz_fill(stream)`009`009fills the buffer from stdin
X *`009lz_flush(stream)`009writes the buffer to stdout
X *`009lz_eof(stream)`009`009returns EOF (for fill from memory)
X *`009lz_fail(stream)`009`009abort (for writing to memory).
X *`009lz_dummy(stream)`009throw an output stream away.
X * Note: if VMS_V4 is enabled and the private (non-export) format
X * chosen, lz_fill and lz_flush access the files appropriately.
X * Stream elements are initialized as follows:
X *`009Input:`009bp = NULL;`009bend = NULL;
X *`009Output:`009bp = bstart;`009bend = bstart + bsize;
X */
X
Xtypedef struct STREAM {
X    char_type`009*bp;`009`009/* Next character to get/put`009`009*/
X    char_type`009*bend;`009`009/* -> end of stream buffer`009`009*/
X    char_type`009*bstart;`009/* Start of stream buffer`009`009*/
X    short`009bsize;`009`009/* Stream buffer size`009`009`009*/
X    int`009`009(*func)();`009/* Read/write a buffer function`009`009*/
X} STREAM;
X
X/*
X * Note also that the compress routine uses putbuf(buf, count, outstream)
X * and the decompress routine uses getbuf(buf, count, instream) to (quickly)
X * transfer multiple bytes.
X */
X#if UCHAR
X#define`009GET(s)`009`009\
X`009(((s)->bp < (s)->bend) ? *(s)->bp++        : (*(s)->func)(s))
X#else
X#define`009GET(s)`009`009\
X`009(((s)->bp < (s)->bend) ? *(s)->bp++ & 0xFF : (*(s)->func)(s))
X#endif
X#define`009PUT(c, s)`009\
X`009((((s)->bp >= (s)->bend) ? (*(s)->func)(s) : 0), *(s)->bp++ = (c))
X
Xextern int lz_fill();
Xextern int lz_flush();
Xextern int lz_eof();
Xextern int lz_fail();
Xextern int lz_dummy();
X
X#if DEBUG
Xextern readonly char *lz_names[];`009`009/* "LZ_CLEAR" etc.`009*/
X#endif
X
X/*
X * Options and globals.
X */
X#if VMS_V4
X#define`009ATT_NAME`009"vms$attributes "
X#define`009ATT_SIZE`00915`009`009`009/* strlen(ATT_NAME)`009*/
Xextern int`009fdl_status;`009/* Error code from fdl library`009`009*/
X#endif
X
Xextern flag`009binary;`009`009/* -b Readable text file if FALSE`009*/
Xextern flag`009noheader;`009/* -x3 No magic header if TRUE`009`009*/
Xextern flag`009export;`009`009/* -x  (non-zero) Supress vms private`009*/
Xextern flag`009block_compress;`009/* -x2`009`009`009`009`009*/
Xextern flag`009verbose;`009/* -v  (non-zero) Verbose logging`009*/
Xextern readonly flag is_compress; /* TRUE if compress, FALSE if decomp.`009*/
Xextern char`009*infilename;`009/* For error printouts`009`009`009*/
Xextern char`009*outfilename;`009/* For more error printouts`009`009*/
Xextern short`009n_bits;`009`009/* Current # of bits in compressed file`009*/
Xextern int`009firstcode;`009/* First value past signals`009`009*/
Xextern jmp_buf`009failure;`009/* For longjmp() return`009`009`009*/
X
$ GOSUB UNPACK_FILE
$ FILE_IS = "LZ.HLP"
$ CHECKSUM_IS = 978341288
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
X1 LZCMP
X File Compression Utility
X Usage:
X
X`009$ LZCMP  [-options]  infile  outfile
X
X LZCMP implements the Lempel-Ziv file compression algorithm.
X (Files compressed by LZCMP are uncompressed by LZDCM.)
X It operates by finding common substrings and replaces them
X with a variable-size code.  This is deterministic, and
X can be done with a single pass over the file.  Thus,
X the decompression procedure needs no input table, but
X can track the way the table was built.
X
X2 Options
X Options may be given in either case.
X2 -BInput
X Input file is "binary", not "human readable text".
X This is necessary on Dec operating systems, such as VMS and
X RSX-11M, that treat these files differently.  (Note that binary
X support is rudamentary and probably insufficient as yet.)
X (On VMS version 4, this is ignored unless the -x option is
X specified or the input file is record-oriented.)
X2 -M bits
X Write using the specified number of bits in the code -- necessary
X for big machines making files for little machines.  For example,
X if compressing a file on VMS which is to be read on a PDP-11,
X you should select -M 12.
X2 -V [n]
X Verbose if specified.  If a value is specified,
X it will enable debugging code (if compiled in).
X2 -X [n]
X "Export" -- write a file format that can be read by
X other operating systems.  Only the bytes in the file are copied;
X file attributes are not preserved.  If specified, the value
X determines the level of compatiblity.  If not specified,
X or specified with an explicit value of zero, and LZCMP is
X running on Vax/VMS version 4 under VaxC and the input file
X is a disk or magtape file (block-oriented), a VMS-private output
X format is used which is incompatible with the Unix compress
X utility, but which preserves VMS file attributes.  -X may
X take on the following values:
X
X    0  Choose VMS private format.  See restrictions below.
X    1  Compatible with Unix compress version 3.0:
X       this is the default if -x is given without a value.
X    2  As above, but supress "block compression"
X    3  Supress block compression and do not output
X       a compress header block.  This is for compatiblity
X       with a quite early version of Unix compress (and requires
X       conditional-compilation to use).
X
X Note that the -B (binary) option is ignored unless
X the input file is "record-oriented", such as a terminal
X or mailbox.
X2 Arguments
X The other two arguments are the input and output
X filenames respectively.  Redirection is supported,
X however, the output must be a disk/tape file.
X
X The file format is almost identical to the current
X Unix implementation of compress (V4.0).  Files written
X by Unix compress should be readable by LZDCM.  Files
X written by LZCMP in export (-x) format will be
X readable by Unix compress (except that LZCMP outputs
X two "clear" codes to mark EOF.  A patch to Unix
X compress is available.)
X
X2 VMS_Restrictions
X
X VMS Private mode stores the true name and attributes
X of the input file into the compressed file and LZDCM
X restores the attributes (and filename if requested).
X The following restrictions apply -- they may be lifted
X in the future as they are primarily due to the author's
X lack of understanding of the intricacies of of VMS I/O:
X
X    All files must be stored on disk.
X    The LZCMP output file must be specified directly.
X
X Also, for all usage on VMS, the compressed file must
X be written to, and read from disk.
X
X2 Compression_Algorithm
X
X This section is abstracted from Terry Welch's article
X referenced below.  The algorithm builds a string
X translation table that maps substrings in the input
X into fixed-length codes.  The compress algorithm may
X be described as follows:
X
X  1. Initialize table to contain single-character
X     strings.
X  2. Read the first character.  Set  (the prefix
X     string) to that character.
X  3. (step): Read next input character, K.
X  4. If at end of file, output code(); exit.
X  5. If K is in the string table:
X     Set  to K; goto step 3.
X  6. Else K is not in the string table.
X     Output code();
X     Put K into the string table;
X     Set  to K; Goto step 3.
X
X "At each execution of the basic step an acceptable input
X string  has been parsed off.  The next character K is
X read and the extended string K is tested to see if it
X exists in the string table.  If it is there, then the
X extended string becomes the parsed string  and the
X step is repeated.  If K is not in the string table,
X then it is entered, the code for the successfully
X parsed string  is put out as comprssed data, the
X character K becomes the beginning of the next string,
X and the step is repeated."
X
X The decompression algorithm translates each received
X code into a prefix string and extension [suffix] character.
X The extension character is stored (in a push-down stack),
X and the prefix translated again, until the prefix is a
X single character, which completes decompression of this
X code.  The entire code is then output by popping the
X stack.
X
X "An update to the string table is made for each code received
X (except the first one).  When a code has been translated,
X its final character is used as the extension character,
X combined with the prior string, to add a new string to
X the string table.  This new string is assigned a unique
X code value, which is the same code that the compressor
X assigned to that string.  In this way, the decompressor
X incrementally reconstructs the same string table that
X the decompressor used.... Unfortunately ... [the algorithm]
X does not work for an abnormal case.
X 
X The abnormal case occurs whenever an input character string
X contains the sequence KKK, where K already
X appears in the compressor string table."
X
X The decompression algorithm, augmented to handle
X the abnormal case, is as follows:
X
X  1. Read first input code;
X     Store in CODE and OLDcode;
X     With CODE = code(K), output(K);  FINchar = K;
X  2. Read next code to CODE; INcode = CODE;
X     If at end of file, exit;
X  3. If CODE not in string table (special case) then
X     Output(FINchar);
X     CODE = OLDcode;
X     INcode = code(OLDcode, FINchar);
X
X  4. If CODE == code(K) then
X     Push K onto the stack;
X     CODE == code();
X     Goto 4.
X
X  5. If CODE == code(K) then
X     Output K;
X     FINchar = K;
X
X  6. While stack not empty
X     Output top of stack;
X     Pop stack;
X
X  7. Put OLDcode,K into the string table.
X     OLDcode = INcode;
X     Goto 2.
X
X The algorithm as implemented here introduces two additional
X complications.
X
X The actual codes are transmitted using a variable-length
X encoding.  The lowest-level routines increase the number
X of bits in the code when the largest possible code is
X transmitted.
X
X Periodically, the algorithm checks that compression is
X still increasing.  If the ratio of input bytes to output
X bytes decreases, the entire process is reset.  This can
X happen if the characteristics of the input file change.
X
X2 VMS_Private_File_Structure
X
X In VMS Private mode, the compressed data file contains
X a variable-length (but compressed) file header with the
X file "attributes" needed by the operating system to
X  construct the file.  This allows the decompression
X program to recreate the file in its original format,
X which is essential if ISAM databases are compressed.
X 
X The overall file format is as follows:
X
X    LZ_SOH  "start of header" signal (this value cannot appear
X            in user data).
X
X            A variable-length data record (maximum 256 bytes)
X            containing the header name, followed by whitespace, followed
X            by header-specific information.  In this case, the name
X            record will contain the string "vms$attributes" followed
X            by the number of bytes in the attribute data block.
X            (I assume that the name record will consist of a facility
X            name, such as "vms", followed by a dollar sign, followed
X            by a facility-unique word.)
X
X    LZ_EOR  Signals "end of record".
X
X            This is followed by a VMS file attributes record (generated
X            by a VMS system libraryroutine).
X
X    LZ_ETX  Signals "end of segment".
X
X    ST_STX  Signals "start of text" (i.e., start of data file).
X
X            This is followed by the user data file.
X
X    LZ_ETX  Signals "end of segment"
X
X    LZ_ETX  Two in a row signals "end of file".
X
X Note that this format can easily be extended to include
X trailer records (with file counts and checksums) and/or
X multiple data files in one compressed file.
X
X Note also that the LZ_CLEAR code may appear in headers
X or data files to cause the decompression program to
X "readapt" to the characteristics of the input data.
X LZ_STX and LZ_SOH reset the compression algorithm.
X LZ_EOR does not.
X
X2 Authors
X The algorithm is from "A Technique for High Performance
X Data Compression."  Terry A. Welch. IEEE Computer Vol 17,
X No. 6 (June 1984), pp 8-19.
X
X This revision is by Martin Minow.
X
X Unix Compress authors are as follows:
X
X Spencer W. Thomas  (decvax!harpo!utah-cs!utah-gr!thomas)
X Jim McKie          (decvax!mcvax!jim)
X Steve Davies       (decvax!vax135!petsd!peora!srd)
X Ken Turkowski      (decvax!decwrl!turtlevax!ken)
X James A. Woods     (decvax!ihnp4!ames!jaw)
X Joe Orost          (decvax!vax135!petsd!joe)
X
X Compatible with compress.c, v3.0 84/11/27
X
X1 LZDCM
X File Decompression Utility
X Usage:
X
X`009$ LZDCM  [-options]  infile  outfile
X
X LZDCM decompresses files compressed by LZCMP.  The
X help information for LZCMP describes the process in
X greater detail.
X
X2 Options
X Options may be given in either case.
X2 -BOutput
X Output file is "binary", not text.  (Ignored
X in VMS private mode.)
X2 -X 3
X To read files compressed by an old Unix version
X that doesn't generate header records.
X2 -V val
X Verbose (print status messages and debugging
X information).  The value selects the amount of verbosity.
$ GOSUB UNPACK_FILE
$ FILE_IS = "LZCMP1.C"
$ CHECKSUM_IS = 371966649
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
X/*
X *`009`009lzcomp [-options] infile outfile
X */
X#ifdef`009DOCUMENTATION
X
Xtitle`009lzcomp`009File Compression
Xindex`009`009File compression
X
Xsynopsis
X`009.s.nf
X`009lzcomp [-options] [infile [outfile]]
X`009.s.f
Xdescription
X
X`009lzcomp implements the Lempel-Ziv file compression algorithm.
X`009(Files compressed by lzcomp are uncompressed by lzdcmp.)
X`009It operates by finding common substrings and replaces them
X`009with a variable-size code.  This is deterministic, and
X`009can be done with a single pass over the file.  Thus,
X`009the decompression procedure needs no input table, but
X`009can track the way the table was built.
X
X`009Options may be given in either case.
X`009.lm +8
X`009.p -8
X`009-B`009Input file is "binary", not "human readable text".
X`009This is necessary on Dec operating systems, such as VMS and
X`009RSX-11M, that treat these files differently.  (Note that binary
X`009support is rudamentary and probably insufficient as yet.)
X`009(On VMS version 4, this is ignored unless the -x option is
X`009specified or the input file is record-oriented.)
X`009.p -8
X`009-M bits`009Write using the specified number of bits in the
X`009code -- necessary for big machines making files for little
X`009machines.  For example, if compressing a file on VMS
X`009which is to be read on a PDP-11, you should select -M 12.
X`009.p -8
X`009-V [n]`009Verbose if specified.  If a value is specified,
X`009it will enable debugging code (if compiled in).
X`009.p -8
X`009-X [n]`009"Export" -- write a file format that can be read by
X`009other operating systems.  Only the bytes in the file are copied;
X`009file attributes are not preserved.  If specified, the value
X`009determines the level of compatiblity.  If not specified,
X`009or specified with an explicit value of zero, and lzcomp is
X`009running on Vax/VMS version 4 under VaxC and the input file
X`009is a disk or magtape file (block-oriented), a VMS-private output
X`009format is used which is incompatible with the Unix compress
X`009utility, but which preserves VMS file attributes.  -X may
X`009take on the following values:
X`009.lm +4.s
X`009.i -4;#0##Choose VMS private format.  See restrictions below.
X`009.i -4;#1##Compatible with Unix compress version 3.0:
X`009this is the default if -x is given without a value.
X`009.i -4;#2##As above, but supress "block compression"
X`009.i -4;#3##Supress block compression and do not output
X`009a compress header block.  This is for compatiblity
X`009with a quite early version of Unix compress (and requires
X`009conditional-compilation to use).
X`009.lm -4.s
X`009Note that the -B (binary) option is ignored unless
X`009the input file is "record-oriented", such as a terminal
X`009or mailbox.
X`009.lm -8.s
X`009The other two arguments are the input and output
X`009filenames respectively.  Redirection is supported,
X`009however, the output must be a disk/tape file.
X
X`009The file format is almost identical to the current
X`009Unix implementation of compress (V4.0).  Files written
X`009by Unix compress should be readable by lzdcmp.  Files
X`009written by lzcomp in export (-x) format will be
X`009readable by Unix compress (except that lzcomp outputs
X`009two "clear" codes to mark EOF.  A patch to Unix
X`009compress is available.)
X
XVMS Restrictions
X
X`009VMS Private mode stores the true name and attributes
X`009of the input file into the compressed file and lzdcmp
X`009restores the attributes (and filename if requested).
X`009The following restrictions apply -- they may be lifted
X`009in the future as they are primarily due to the author's
X`009lack of understanding of the intricacies of of VMS I/O:
X
X`009    All files must be stored on disk.
X`009    The lzcomp output file must be specified directly.
X
X`009Also, for all usage on VMS, the compressed file must
X`009be written to, and read from disk.
X
XLZW compression algorithm
X
X`009This section is abstracted from Terry Welch's article
X`009referenced below.  The algorithm builds a string
X`009translation table that maps substrings in the input
X`009into fixed-length codes.  The compress algorithm may
X`009be described as follows:
X
X`009  1. Initialize table to contain single-character
X`009     strings.
X`009  2. Read the first character.  Set  (the prefix
X`009     string) to that character.
X`009  3. (step): Read next input character, K.
X `009  4. If at end of file, output code(); exit.
X`009  5. If K is in the string table:
X`009`009Set  to K; goto step 3.
X`009  6. Else K is not in the string table.
X`009`009Output code();
X`009`009Put K into the string table;
X`009`009Set  to K; Goto step 3.
X
X`009"At each execution of the basic step an acceptable input
X`009string  has been parsed off.  The next character K is
X`009read and the extended string K is tested to see if it
X`009exists in the string table.  If it is there, then the
X`009extended string becomes the parsed string  and the
X`009step is repeated.  If K is not in the string table,
X`009then it is entered, the code for the successfully
X`009parsed string  is put out as comprssed data, the
X`009character K becomes the beginning of the next string,
X`009and the step is repeated."
X
X`009The decompression algorithm translates each received
X`009code into a prefix string and extension [suffix] character.
X`009The extension character is stored (in a push-down stack),
X`009and the prefix translated again, until the prefix is a
X`009single character, which completes decompression of this
X`009code.  The entire code is then output by popping the
X`009stack.
X
X`009"An update to the string table is made for each code received
X`009(except the first one).  When a code has been translated,
X`009its final character is used as the extension character,
X`009combined with the prior string, to add a new string to
X`009the string table.  This new string is assigned a unique
X`009code value, which is the same code that the compressor
X`009assigned to that string.  In this way, the decompressor
X`009incrementally reconstructs the same string table that
X`009the decompressor used.... Unfortunately ... [the algorithm]
X`009does not work for an abnormal case.
X
X`009The abnormal case occurs whenever an input character string
X`009contains the sequence KKK, where K already
X`009appears in the compressor string table."
X
X`009The decompression algorithm, augmented to handle
X`009the abnormal case, is as follows:
X
X`009  1. Read first input code;
X`009     Store in CODE and OLDcode;
X`009     With CODE = code(K), output(K);  FINchar = K;
X`009  2. Read next code to CODE; INcode = CODE;
X`009     If at end of file, exit;
X`009  3. If CODE not in string table (special case) then
X`009`009Output(FINchar);
X`009`009CODE = OLDcode;
X`009`009INcode = code(OLDcode, FINchar);
X`009
X`009  4. If CODE == code(K) then
X`009`009Push K onto the stack;
X`009`009CODE == code();
X`009`009Goto 4.
X
X`009  5. If CODE == code(K) then
X`009`009Output K;
X`009`009FINchar = K;
X
X`009  6. While stack not empty
X`009`009Output top of stack;
X`009`009Pop stack;
X
X`009  7. Put OLDcode,K into the string table.
X`009     OLDcode = INcode;
X`009     Goto 2.
X
X`009The algorithm as implemented here introduces two additional
X`009complications.
X
X`009The actual codes are transmitted using a variable-length
X`009encoding.  The lowest-level routines increase the number
X`009of bits in the code when the largest possible code is
X`009transmitted.
X
X`009Periodically, the algorithm checks that compression is
X`009still increasing.  If the ratio of input bytes to output
X`009bytes decreases, the entire process is reset.  This can
X`009happen if the characteristics of the input file change.
X
XVMS Private File Structure
X
X`009In VMS Private mode, the compressed data file contains
X`009a variable-length (but compressed) file header with the
X`009file "attributes" needed by the operating system to
X `009construct the file.  This allows the decompression
X`009program to recreate the file in its original format,
X`009which is essential if ISAM databases are compressed.
X
X`009The overall file format is as follows:
X`009.lm +8
X`009.p -8
X`009LZ_SOH`009"start of header" signal (this value cannot appear
X`009in user data).
X
X`009A variable-length data record (maximum 256 bytes)
X`009containing the header name, followed by whitespace, followed
X`009by header-specific information.  In this case, the name
X`009record will contain the string "vms$attributes" followed
X`009by the number of bytes in the attribute data block.
X`009(I assume that the name record will consist of a facility
X`009name, such as "vms", followed by a dollar sign, followed
X`009by a facility-unique word.)
X`009.p -8
X`009LZ_EOR`009Signals "end of record".
X
X`009This is followed by a VMS file attributes record (generated
X`009by a VMS system library`009routine).
X`009.p -8
X`009LZ_ETX`009Signals "end of segment".
X`009.p -8
X`009ST_STX`009Signals "start of text" (i.e., start of data file).
X
X`009This is followed by the user data file.
X`009.p -8
X`009LZ_ETX`009Signals "end of segment"
X`009.p -8
X`009LZ_ETX`009Two in a row signals "end of file".
X`009.s.lm -8
X`009Note that this format can easily be extended to include
X`009trailer records (with file counts and checksums) and/or
X`009multiple data files in one compressed file.
X
X`009Note also that the LZ_CLEAR code may appear in headers
X`009or data files to cause the decompression program to
X`009"readapt" to the characteristics of the input data.
X`009LZ_STX and LZ_SOH reset the compression algorithm.
X`009LZ_EOR does not.
X
XAuthors
X
X`009The algorithm is from "A Technique for High Performance
X`009Data Compression."  Terry A. Welch. IEEE Computer Vol 17,
X`009No. 6 (June 1984), pp 8-19.
X
X`009This revision is by Martin Minow.
X
X`009Unix Compress authors are as follows:
X`009.s.nf
X`009Spencer W. Thomas`009(decvax!harpo!utah-cs!utah-gr!thomas)
X`009Jim McKie`009`009(decvax!mcvax!jim)
X`009Steve Davies`009`009(decvax!vax135!petsd!peora!srd)
X`009Ken Turkowski`009`009(decvax!decwrl!turtlevax!ken)
X`009James A. Woods`009`009(decvax!ihnp4!ames!jaw)
X`009Joe Orost`009`009(decvax!vax135!petsd!joe)
X`009.s.f
X
X#endif
X
X/*
X * Compatible with compress.c, v3.0 84/11/27
X */
X
X/*)BUILD
X`009`009$(PROGRAM) = lzcomp
X`009`009$(INCLUDE) = lz.h
X`009`009$(CPP) = 1
X`009`009$(FILES) = { lzcmp1.c lzcmp2.c lzcmp3.c lzio.c lzvio.c }
X*/
X`012
X#include`009"lz.h"
X
X
X#ifdef unix
X#include 
X#include 
X#endif
X
X/*
X * These global parameters are written to the compressed file.
X * The decompressor needs them.  The initialized values are defaults
X * and are modified by command line arguments.
X */
Xshort`009`009maxbits = BITS;`009`009/* settable max # bits/code`009*/
Xcode_int maxmaxcode = 1 << BITS; `009/* Totally largest code`009*/
Xcode_int`009hsize = HSIZE;`009`009/* Actual hash table size`009*/
X
X/*
X * Flags (command line arguments) to control compression.
X */
X#if VMS_V4
Xflag`009`009export = 0;`009`009/* Assume vms "private" mode`009*/
X#else
Xflag`009`009export = 1;`009`009/* Assume Unix compatible mode`009*/
X#endif
Xflag`009`009block_compress = TRUE;`009/* Assume block compression`009*/
Xflag`009`009binary = FALSE;`009`009/* Reading text file if FALSE`009*/
Xflag`009`009noheader = FALSE;`009/* No magic header if TRUE`009*/
Xflag`009`009verbose = VERBOSE_DEFAULT; /* Non-zero for status/debug`009*/
Xflag`009`009background = FALSE;`009/* TRUE (Unix) if detached`009*/
Xreadonly flag`009is_compress = TRUE;`009/* for lzio.c (needed?)`009`009*/
Xlong`009`009fsize;`009`009`009/* Input file size in bytes`009*/
Xchar`009`009*infilename = NULL;`009/* For error printouts`009`009*/
Xchar`009`009*outfilename = NULL;`009/* For openoutput and errors`009*/
Xint`009`009firstcode;`009`009/* First code after internals`009*/
Xcount_int`009tot_incount = 0;`009/* Total number of input bytes`009*/
Xcount_int`009tot_outcount = 0;`009/* Total number of output codes`009*/
Xextern count_int in_count;
Xextern count_int out_count;
Xstatic long`009start_time;`009`009/* Time we started (in msec)`009*/
Xextern long`009cputime();`009`009/* Returns process time in msec`009*/
XSTREAM`009`009instream;
XSTREAM`009`009outstream;
Xchar_type`009inbuffer[MAXIO];
Xchar_type`009outbuffer[MAXIO];
Xstatic STREAM`009mem_stream;
Xjmp_buf`009`009failure;
X#if VMS_V4
X#include types
X#include stat
X#include descrip
X#ifndef FDLSTUFF
X#define FDLSTUFF char
X#endif
XFDLSTUFF`009*fdl_input;
XFDLSTUFF`009*fdl_output;
Xstatic struct dsc$descriptor fdl_descriptor;
X#endif
X`012
Xmain(argc, argv)
Xint`009`009argc;
Xchar`009`009*argv[];
X/*
X * Compress mainline
X */
X{
X#ifndef`009decus
X`009/*
X`009 * background is TRUE if running detached from the command terminal.
X`009 */
X`009background = (signal(SIGINT, SIG_IGN) == SIG_IGN) ? TRUE : FALSE;
X`009if (!background)
X`009    background = !isatty(fileno(stderr));
X`009if (!background) {
X`009    if (verbose > 0)
X`009`009signal(SIGINT, abort);
X`009    else {
X`009`009signal(SIGINT, interrupt);
X`009`009signal(SIGSEGV, address_error);
X`009    }
X`009}
X#endif
X`009if (setjmp(failure) == 0) {
X`009    setup(argc, argv);`009`009/* Command line parameters`009*/
X`009    openinput();`009`009/* Open input, set instream`009*/
X`009    getfilesize();`009`009/* Get input file size`009`009*/
X`009    gethashsize();`009`009/* Get actual hash table size`009*/
X`009    initialize();`009`009/* Set maxbits and the like`009*/
X`009    openoutput();`009`009/* Open output file`009`009*/
X`009    if (verbose > 0)
X`009`009start_time = cputime();
X`009    put_magic_header();
X`009    init_compress(TRUE);
X`009    compress(&instream);
X#if VMS_V4
X`009    if (export == 0) {
X`009`009outputcode((code_int) LZ_ETX);
X`009`009outputcode((code_int) LZ_ETX);
X`009`009fdl_close(fdl_input);
X`009    }
X`009    else
X#endif
X`009    if (block_compress) {
X`009`009outputcode((code_int) LZ_CLEAR);
X`009`009outputcode((code_int) LZ_CLEAR);
X`009    }
X`009    outputcode((code_int) -1);`009`009/* Flush output buffers`009*/
X#if VMS_V4
X`009    if (export == 0)
X`009`009fdl_close(fdl_output);
X`009    else {
X`009`009fclose(stdout);
X`009    }
X#else
X`009    fclose(stdout);
X#endif
X`009    if (verbose > 0) {
X`009`009start_time = cputime() - start_time;
X`009`009tot_incount += in_count;
X`009`009tot_outcount += out_count;
X`009`009fprintf(stderr, "%ld chars in, %ld bytes out, ",
X`009`009    tot_incount, tot_outcount);
X`009`009if (tot_outcount > 0) {
X`009`009    divout("compression ratio: ",
X`009`009`009(long) tot_incount, (long) tot_outcount, "");
X`009`009    divout(" (",
X`009`009`009((long) tot_incount - (long) tot_outcount) * 100,
X`009`009`009(long) tot_incount, "%)\n");
X`009`009}
X`009`009fprintf(stderr,
X`009`009    "%ld.%02ld seconds (process time) for compression.\n",
X`009`009    start_time / 1000L, (start_time % 1000L) / 10L);
X`009`009if (start_time > 0) {
X`009`009    divout("  ", (long) tot_incount * 10L,
X`009`009`009(start_time + 50L) / 100L,
X`009`009`009" input bytes per second.\n");
X`009`009}
X`009    }
X`009    exit(IO_SUCCESS);
X`009}
X`009else {
X`009    fprintf(stderr, "Error when compressing \"%s\" to \"%s\"\n",
X`009`009(infilename  == NULL) ? 
X`009`009    "" : infilename,
X`009`009(outfilename == NULL) ?
X`009`009    "" : outfilename);
X`009    if (errno != 0)
X`009`009perror("lzcomp fatal error");
X`009    exit(IO_ERROR);
X`009}
X}
X`012
Xdivout(leader, numer, denom, trailer)
Xchar`009`009*leader;
Xlong`009`009numer;
Xlong`009`009denom;
Xchar`009`009*trailer;
X/*
X * Print numer/denom without floating point on small machines.
X */
X{
X`009fprintf(stderr, "%s%ld.%02ld%s",
X`009    leader, numer / denom, ((numer % denom) * 100L) / denom, trailer);
X}
X
Xstatic
Xinitialize()
X/*
X * Mung some global values.
X */
X{
X`009if (maxbits < INIT_BITS)`009/* maxbits is set by the -M `009*/
X`009    maxbits = INIT_BITS;`009/* option.  Make sure it's`009*/
X`009if (maxbits > BITS)`009`009/* within a reasonable range`009*/
X`009    maxbits = BITS;
X`009maxmaxcode = 1 << maxbits;`009/* Truly biggest code`009`009*/
X`009if (export == 0)
X`009    firstcode = LZ_FIRST;`009/* VMS private`009`009`009*/
X`009else if (block_compress)
X`009    firstcode = LZ_CLEAR + 1;`009/* Default`009`009`009*/
X`009else
X`009    firstcode = 256;`009`009/* Backwards compatible`009`009*/
X}
X`012
Xput_magic_header()
X/*
X * Write the magic header bits.
X */
X{
X#ifndef COMPATIBLE
X`009if (export && !noheader) {
X`009    PUT(HEAD1_MAGIC, &outstream);
X`009    PUT(HEAD2_MAGIC, &outstream);
X`009    PUT(maxbits | ((block_compress) ? BLOCK_MASK : 0),
X`009`009&outstream);
X`009}
X#if VMS_V4
X`009else if (export == 0) {
X`009    char`009`009text[256];
X`009    /*
X`009     * VMS private mode (with attribute block)
X`009     */
X`009    PUT(HEAD1_MAGIC, &outstream);
X`009    PUT(VMS_HEAD2_MAGIC, &outstream);
X`009    PUT((char) (maxbits | BLOCK_MASK), &outstream);
X`009    PUT(firstcode - 0x100, &outstream);
X`009    init_compress();
X`009    outputcode(LZ_SOH);
X#if DEBUG
X`009    if (strlen(ATT_NAME) != ATT_SIZE) {
X`009`009fprintf("\"%s\", expected %d, got %d\n",
X`009`009    ATT_NAME, ATT_SIZE, strlen(ATT_NAME));
X`009    }
X#endif
X`009    sprintf(text, "%s%d;", ATT_NAME, fdl_descriptor.dsc$w_length);
X`009    mem_compress(text, strlen(text));
X`009    outputcode(LZ_EOR);
X`009    mem_compress(fdl_descriptor.dsc$a_pointer,
X`009`009`009 fdl_descriptor.dsc$w_length);
X`009    fdl_free(&fdl_descriptor);
X`009    outputcode(LZ_ETX);
X`009    outputcode(LZ_STX);
X`009}
X#endif
X#endif
X}
X
Xmem_compress(datum, length)
Xchar_type`009*datum;
Xint`009`009length;
X/*
X * Compress from memory
X */
X{
X`009mem_stream.bp = mem_stream.bstart = datum;
X`009mem_stream.bsize = length;
X`009mem_stream.bend = datum + length;
X`009mem_stream.func = lz_eof;
X`009compress(&mem_stream);
X}
X`012
X/*
X * This routine is used to tune the hash table size according to
X * the file size.  If the filesize is unknown, fsize should be
X * set to zero.
X */
X
Xtypedef struct TUNETAB {
X    long`009fsize;
X    code_int`009hsize;
X} TUNETAB;
X
Xstatic readonly TUNETAB tunetab[] = {
X#if HSIZE > 5003
X    {`0091 << 12,`009 5003`009},
X#endif
X#if HSIZE > 9001
X    {`0091 << 13,`009 9001`009},
X#endif
X#if HSIZE > 18013
X    {`0091 << 14,`00918013`009},
X#endif
X#if HSIZE > 35023
X    {`0091 << 15,`00935023`009},
X    {`009  47000,`00950021`009},
X#endif
X    {`009      0,`009    0`009},
X};
X
Xstatic
Xgethashsize()
X/*
X * Tune the hash table parameters for small files.
X * We don't have a good way to find the file size on vms V3.
X * fsize is set to zero if we can't find it.
X */
X{
X`009register TUNETAB`009*tunep;
X
X`009hsize = HSIZE;
X`009if (fsize > 0) {
X`009    for (tunep = tunetab; tunep->fsize != 0; tunep++) {
X`009`009if (fsize < tunep->fsize) {
X`009`009    hsize = tunep->hsize;
X`009`009    break;
X`009`009}
X`009    }
X`009}
X}
X`012
Xstatic
Xgetfilesize()
X/*
X * Set fsize to the input filesize (in bytes) if possible.
X * Magic for all operating systems.
X */
X{
X#ifdef`009rsx
X`009extern char`009f_efbk;`009/* F.EFBK -- highest block in file`009*/
X#define`009fdb(p,offset)`009(stdin->io_fdb[((int) &p + offset)] & 0xFF)
X#define efbk(offset)`009fdb(f_efbk, offset)
X`009extern char`009f_rtyp;`009/* F.RTYP -- Record type`009`009*/
X`009extern char`009f_ratt;`009/* F.RATT -- Record attributes`009`009*/
X`009/*
X`009 * Note: Block number is stored high-order word first.
X`009 */
X`009fsize = efbk(2)
X`009    + (efbk(3) << 8)
X`009    + (efbk(0) << 16)
X`009    + (efbk(1) << 24);
X`009fsize *= 512;
X#endif
X#ifdef`009rt11
X`009fsize = stdin->io_size;`009`009/* Set by Decus C`009`009*/
X`009fsize *= 512;
X#endif
X#ifdef`009vms
X#if VMS_V4
X`009struct stat`009statbuf;
X
X`009fsize = 0;
X`009if (export != 0) {
X`009    if (fstat(fileno(stdin), &statbuf) == 0)
X`009`009fsize = (long) statbuf.st_size;
X`009}
X`009else {
X`009    fsize = (long) fdl_fsize(fdl_input);
X`009}
X#else
X`009fsize = 0;`009`009`009`009/* Can't find filesize`009*/
X#endif
X#endif
X#ifdef`009unix
X`009struct stat`009statbuf;
X
X`009fsize = 0;
X`009if (fstat(fileno(stdin), &statbuf) == 0)
X`009    fsize = (long) statbuf.st_size;
X#endif
X}
X`012
Xstatic readonly char *helptext[] = {
X`009"The following options are valid:",
X`009"-B\tBinary file (important on VMS/RSX, ignored on Unix)",
X`009"-M val\tExplicitly set the maximum number of code bits",
X`009"-V val\tPrint status information (or debugging) to stderr",
X`009"-X val\tSet export (compatiblity) mode:",
X#if VMS_V4
X`009"  -X 0\tExplicitly choose VMS Private mode",
X#endif
X`009"  -X 1\t(default if -X specified, output format is compatible",
X`009      "\twith Unix compress V3.0",
X`009"  -X 2\tCompatible with Unix compress 3.0, block compression",
X`009      "\tsupressed.",
X#ifdef COMPATIBLE
X`009"  -X 3No header (file is readable by old compress)",
X#endif
X`009NULL,
X};
X
Xstatic
Xsetup(argc, argv)
Xint`009`009argc;
Xchar`009`009*argv[];
X/*
X * Get parameters and open files.  Exit fatally on errors.
X */
X{
X`009register char`009*ap;
X`009register int`009c;
X`009char`009`009**hp;
X`009auto int`009i;
X`009int`009`009j;
X
X#ifdef`009vms
X`009argc = getredirection(argc, argv);
X#endif
X`009for (i = j = 1; i < argc; i++) {
X`009    ap = argv[i];
X`009    if (*ap++ != '-' || *ap == EOS)`009/* Filename?`009`009*/
X`009`009argv[j++] = argv[i];`009`009/* Just copy it`009`009*/
X`009    else {
X`009`009while ((c = *ap++) != EOS) {
X`009`009    if (islower(c))
X`009`009`009c = toupper(c);
X`009`009    switch (c) {
X`009`009    case 'B':
X`009`009`009binary = TRUE;
X`009`009`009break;
X
X`009`009    case 'M':
X`009`009`009maxbits = getvalue(ap, &i, argv);
X`009`009`009if (maxbits < MIN_BITS) {
X`009`009`009    fprintf(stderr, "Illegal -M value\n");
X`009`009`009    goto usage;
X`009`009`009}
X`009`009`009break;
X
X`009`009    case 'V':
X`009`009`009verbose = getvalue(ap, &i, argv);
X`009`009`009break;
X
X`009`009    case 'X':
X`009`009`009export = getvalue(ap, &i, argv);
X`009`009`009if (export < 0 || export > 3) {
X`009`009`009    fprintf(stderr, "Illegal -X value: %d\n", export);
X`009`009`009    goto usage;
X`009`009`009}
X`009`009`009block_compress = "\1\1\0\0"[export];
X`009`009`009noheader       = "\0\0\0\1"[export];
X`009`009`009export         = "\0\1\1\1"[export];
X`009`009`009break;
X
X`009`009    default:
X`009`009`009fprintf(stderr, "Unknown option '%c' in \"%s\"\n",
X`009`009`009`009*ap, argv[i]);
Xusage:`009`009`009for (hp = helptext; *hp != NULL; hp++)
X`009`009`009    fprintf(stderr, "%s\n", *hp);
X`009`009`009FAIL("usage");
X`009`009    }`009`009`009`009/* Switch on options`009*/
X`009`009}`009`009`009`009/* Everything for -xxx`009*/
X`009    }`009`009`009`009`009/* If -option`009`009*/
X`009}`009`009`009`009`009/* For all argc's`009*/
X`009/*  infilename = NULL; */`009`009/* Set "stdin"  signal`009*/
X`009/* outfilename = NULL; */`009`009/* Set "stdout" signal`009*/
X`009switch (j) {`009`009`009`009/* Any file arguments?`009*/
X`009case 3:`009`009`009`009`009/* both files given`009*/
X`009    if (!streq(argv[2], "-"))`009`009/* But - means stdout`009*/
X`009`009outfilename = argv[2];
X`009case 2:`009`009`009`009`009/* Input file given`009*/
X`009    if (!streq(argv[1], "-")) {
X`009`009infilename = argv[1];
X`009    }
X`009    break;
X
X`009case 0:`009`009`009`009`009/* None!`009`009*/
X`009case 1:`009`009`009`009`009/* No file arguments`009*/
X`009    break;
X
X`009default:
X`009    fprintf(stderr, "Too many file arguments\n");
X`009    FAIL("too many files");
X`009}
X}
X`012
Xstatic int
Xgetvalue(ap, ip, argv)
Xregister char`009`009*ap;
Xint`009`009`009*ip;
Xchar`009`009`009*argv[];
X/*
X * Compile a "value".  We are currently scanning *ap, part of argv[*ip].
X * The following are possible:
X *`009-x123`009`009return (123) and set *ap to EOS so the caller
X *`009ap^`009`009cycles to the next argument.
X *
X *`009-x 123`009`009*ap == EOS and argv[*ip + 1][0] is a digit.
X *`009`009`009return (123) and increment *i to skip over the
X *`009`009`009next argument.
X *
X *`009-xy or -x y`009return(1), don't touch *ap or *ip.
X *
X * Note that the default for "flag option without value" is 1.  This
X * can only cause a problem for the -M option where the value is
X * mandatory.  However, the result of 1 is illegal as it is less
X * than INIT_BITS.
X */
X{
X`009register int`009result;
X`009register int`009i;
X
X`009i = *ip + 1;
X`009if (isdigit(*ap)) {
X`009    result = atoi(ap);
X`009    *ap = EOS;
X`009}
X`009else if (*ap == EOS
X`009      && argv[i] != NULL
X`009      && isdigit(argv[i][0])) {
X`009    result = atoi(argv[i]);
X`009    *ip = i;
X`009}
X`009else {
X`009    result = 1;
X`009}
X`009return (result);
X}
X`012
Xopeninput()
X{
X#ifdef decus
X`009if (infilename == NULL) {
X`009    infilename = malloc(256 + 1);
X`009    fgetname(stdin, infilename);
X`009    infilename = realloc(infilename, strlen(infilename) + 1);
X`009}
X`009else {
X`009    if (freopen(infilename, (binary) ? "rn" : "r", stdin) == NULL) {
X`009`009perror(infilename);
X`009`009FAIL("can't reopen input");
X`009    }
X`009}
X#else
X#ifdef vms
X#if VMS_V4
X`009if (export == 0) {
X`009    char`009`009*fname;
X`009    char`009`009filename[256];
X
X`009    if ((fname = infilename) == NULL) {
X`009`009fgetname(stdin, filename);
X`009`009fname = filename;
X`009    }
X`009    if ((fdl_input = fdl_open(fname, &fdl_descriptor)) == NULL) {
X`009`009if ((fdl_status & 01) == 0) {
X`009`009    fdl_message(NULL, fname);
X`009`009    FAIL("can't fdl_open");
X`009`009}
X`009`009fprintf(stderr,
X`009`009    "Cannot open \"%s\" in vms private format,", fname);
X`009`009fprintf(stderr, " trying export format.\n");
X`009`009export = TRUE;
X`009`009goto try_export;
X`009    }
X`009    fclose(stdin);
X`009    stdin = NULL;
X`009    infilename = malloc(256 + 1);
X`009    infilename = realloc(fname, strlen(fname) + 1);
X`009    if (verbose > 1) {
X`009`009fprintf(stderr, "FDL information for \"%s\"\n", filename);
X`009`009fdl_dump(&fdl_descriptor, stderr);
X`009    }
X`009    goto opened;
X`009}
Xtry_export:
X#endif
X`009if (infilename == NULL) {
X`009    infilename = malloc(256 + 1);
X`009    fgetname(stdin, infilename);
X`009    infilename = realloc(infilename, strlen(infilename) + 1);
X`009}
X`009else {
X#if VMS_V4
X`009    if ((stdin = freopen(infilename, "r", stdin)) == NULL) {
X#else
X`009    if (freopen(infilename, "r", stdin) == NULL) {
X#endif
X`009`009perror(infilename);
X`009`009exit(IO_ERROR);
X`009    }
X`009}
X#else
X`009if (infilename == NULL)
X`009    infilename = "stdin";
X`009else {
X`009    if (freopen(infilename, "r", stdin) == NULL) {
X`009`009perror(infilename);
X`009`009exit(IO_ERROR);
X`009    }`009`009    
X`009}
X#endif
X#endif
Xopened:`009instream.bp = instream.bend = NULL;
X`009instream.bstart = inbuffer;
X`009instream.bsize = sizeof inbuffer;
X`009instream.func = lz_fill;
X}
X`012
Xopenoutput()
X/*
X * Open the output file (after the input file has been opened).
X * if outfilename == NULL, it's already open on stdout.
X */
X{
X`009if (outfilename == NULL) {
X#if VMS_V4
X#if 0`009`009`009`009`009/* The following doesn't work`009*/
X`009    outfilename = malloc(256 + 1);
X`009    fgetname(stdout, outfilename);
X`009    outfilename = realloc(outfilename, strlen(outfilename) + 1);
X`009    if (export == 0) {
X`009`009fclose(stdout);
X`009`009stdout = NULL;`009`009/* Can't do terminal test below`009*/
X`009`009if ((fdl_output = fdl_create(NULL, outfilename)) == NULL) {
X`009`009    if ((fdl_status & 01) == 0)
X`009`009`009fdl_message(NULL, outfilename);
X`009`009    fprintf(stderr, "Can't create \"%s\"\n", outfilename);
X`009`009    FAIL("can't fdl_create");
X`009`009}
X`009    }
X#else
X`009    fprintf(stderr,
X`009`009"Restriction: The output file must be specified.\n");
X`009    FAIL("can't redirect on VMS V4");
X#endif
X#else
X#ifdef`009vms
X`009    outfilename = malloc(256 + 1);
X`009    fgetname(stdout, outfilename);
X`009    outfilename = realloc(outfilename, strlen(outfilename) + 1);
X#else
X#ifdef decus
X`009    outfilename = malloc(256 + 1);
X`009    fgetname(stdout, outfilename);
X`009    outfilename = realloc(outfilename, strlen(outfilename) + 1);
X#else
X`009    outfilename = "";
X#endif
X#endif
X#endif
X`009}
X`009else {
X#if VMS_V4
X`009    if (export == 0) {
X`009`009fclose(stdout);
X`009`009stdout = NULL;`009`009/* Can't do terminal test below`009*/
X`009`009if ((fdl_output = fdl_create(NULL, outfilename)) == NULL) {
X`009`009    if ((fdl_status & 01) == 0)
X`009`009`009fdl_message(NULL, outfilename);
X`009`009    fprintf(stderr,
X`009`009`009"Can't create \"%s\" (VMS private)\n", outfilename);
X`009`009    FAIL("can't fdl_create");
X`009`009}
X`009    }
X`009    else {
X`009`009if (freopen(outfilename, "w", stdout) == NULL) {
X`009`009    perror(outfilename);
X`009`009    FAIL("can't create");
X`009`009}
X`009    }
X#else
X#ifdef decus
X`009    if (freopen(outfilename, "wn", stdout) == NULL) {
X`009`009perror(outfilename);
X`009`009FAIL("can't create");
X`009    }
X#else
X`009    if (freopen(outfilename, "w", stdout) == NULL) {
X`009`009perror(outfilename);
X`009`009FAIL("can't create");
X`009    }
X#endif
X#endif
X`009}
X`009if (stdout != NULL && isatty(fileno(stdout))) {
X`009    fprintf(stderr, "%s: is a terminal.  We object.\n",
X`009`009outfilename);
X`009    FAIL("can't create");
X`009}
X`009outstream.bp = outstream.bstart = outbuffer;
X`009outstream.bend = outbuffer + sizeof outbuffer;
X`009outstream.bsize = sizeof outbuffer;
X`009outstream.func = lz_flush;
X}
X
$ GOSUB UNPACK_FILE
$ FILE_IS = "LZCMP2.C"
$ CHECKSUM_IS = 1634966692
$ COPY SYS$INPUT VMS_SHARE_DUMMY.DUMMY
X/*
X *`009`009l z c m p 2 . c
X *
X * Actually do compression.  Terminology (and algorithm):
X *
X * Assume the input string is "abcd", we have just processed "ab" and
X * read 'c'.  At this point, a "prefix code" will be assigned to "ab".
X * Search in the prefix:character memory (either the "fast memory" or
X * the hash-code table) for the code followed by this character.  If
X * found, assign the code found to the "prefix code" and read the
X * next character.  If not found, output the current prefix code,
X * generate a new prefix code and store "old_prefix:char" in the
X * table with "new_prefix" as its definition.
X *
X * Naming conventions:
X *   code`009a variable containing a prefix code
X *   c or char`009a variable containing a character
X *
X * There are three tables that are searched (dependent on compile-time
X * and execution time considerations):
X *   fast`009Direct table-lookup -- requires a huge amount of physical
X *`009`009(non-paged) memory, but is very fast.
X *   hash`009Hash-coded table-lookup.
X *   cache`009A "look-ahead" cache for the hash table that optimizes
X *`009`009searching for the most frequent character.  This considerably
X *`009`009speeds up processing for raster-images (for example) at
X *`009`009a modest amount of memory.
X * Structures are used to hold the actual tables to simplify organization
X * of the program.
X *
X * Subroutines:
X *    compress()`009performs data compression on an input datastream.
X *    init_compress()`009called by the output routine to clear tables.
X */
X
X#include`009"lz.h"
X
X/*
X * General variables
X * Cleared by init_compress on a "hard initialization"
X * outputcode() in lzcmp3.c refers to next_code.
X */
X
Xlong int`009in_count;`009`009/* Length of input`009`009*/
Xlong int`009out_count;`009`009/* Bytes written to output file`009*/
Xstatic flag`009first_clear = TRUE;`009/* Don't zero first time`009*/
Xcode_int`009next_code;`009`009/* Next output code`009`009*/
Xstatic count_int checkpoint = CHECK_GAP; /* When to test ratio again`009*/
Xstatic long`009ratio = 0;`009`009/* Ratio for last segment`009*/
X
X/*
X * These global parameters are set by mainline code.  Unchanged here.
X */
Xextern short`009maxbits;`009`009/* Settable max # bits/code`009*/
Xextern short`009block_compress;`009`009/* For old-style compatibility`009*/
Xextern code_int`009maxmaxcode;`009`009/* Actual maximum output code`009*/
Xextern long`009tot_incount;`009`009/* Total input count`009`009*/
-+-+-+-+-+ End of part 1 +-+-+-+-+-