ARC 5.20 w/Squashing, 7/9
Howard Chu
hyc at umix.cc.umich.edu
Thu Apr 14 16:03:46 AEST 1988
XX } catreturn;
XX
XX char *i;
XX int j, RETCODE;
XX
XX static int catptr = 0;
XX static int catflag = 0x200;
XX static int cattype = 1;
XX static int patflag = 0;
XX
XX catreturn.maxlen = 256;
XX
XX if (patflag) {
XX patflag = 0;
XX catptr = 0;
XX return (NULL);
XX }
XX if (filepattern) {
XX strcpy(pattern.name, filepattern);
XX pattern.len = strlen(filepattern);
XX if (!index(filepattern, '?'))
XX patflag = 1;
XX }
XX if (patflag) {
XX fileinfo(&pattern, &cattype, "CINAME ", &catreturn, _retcode RETCODE);
XX catptr = RETCODE ? 0 : 1;
XX } else
XX catscan(&pattern, &catflag, &cattype, &catreturn, &catptr);
XX
XX if (!catptr)
XX return (NULL);
XX else {
XX char *k;
XX
XX k = index(catreturn.name, ' ');
XX if (k)
XX *k = 0;
XX else {
XX j = catreturn.actlen;
XX catreturn.name[j] = 0;
XX }
XX k = catreturn.name;
XX if (catreturn.name[0] == tmpchr[0])
XX k++;
XX else if ((k = index(catreturn.name, sepchr[0])))
XX k++;
XX else
XX k = catreturn.name;
XX j = strlen(k);
XX i = malloc(++j);
XX strcpy(i, k);
XX return (i);
XX }
XX#else
XX fortran gfinfo();
XX static char gfname[24];
XX static char pattern[20];
XX static int gfdummy[2] = {
XX 0, 0
XX }, gfflags;
XX int i, RETCODE;
XX char *j, *k;
XX
XX if (filepattern) {
XX strcpy(pattern, filepattern);
XX strcat(pattern, " ");
XX for (i = 20; i < 24; i++)
XX gfname[i] = '\0';
XX if (index(pattern, '?'))
XX gfflags = 0x0C;
XX else
XX gfflags = 0x09;
XX } else if (gfflags == 0x09)
XX return (NULL);
XX
XX gfinfo(pattern, gfname, &gfflags, gfdummy, gfdummy, gfdummy, _retcode RETCODE);
XX if (RETCODE)
XX return (NULL);
XX else {
XX k = index(gfname, ' ');
XX *k = '\0';
XX k = gfname;
XX if (gfname[0] == tmpchr[0])
XX k++;
XX else if ((k = index(gfname, sepchr[0])))
XX k++;
XX else
XX k = gfname;
XX i = strlen(k);
XX j = malloc(++i);
XX strcpy(j, k);
XX return (j);
XX }
XX#endif
XX}
XX
XXunlink(path)
XX char *path; /* name of file to delete */
XX{
XX fortran destroy();
XX int RETCODE;
XX
XX char name[258];
XX
XX strcpy(name, path);
XX strcat(name, " ");
XX destroy(name, _retcode RETCODE);
XX if (RETCODE)
XX return (-1);
XX else
XX return (0);
XX}
XX#endif
SHAR_EOF
if test 11060 -ne "`wc -c arcmisc.c`"
then
echo shar: error transmitting arcmisc.c '(should have been 11060 characters)'
fi
echo shar: extracting arcpack.c '(7219 characters)'
sed 's/^XX//' << \SHAR_EOF > arcpack.c
XX/*
XX * $Log: arcpack.c,v $
XX * Revision 1.3 88/04/11 18:57:00 hyc
XX * fixed a typo...
XX *
XX * Revision 1.2 88/04/11 18:38:15 hyc
XX * added support for squashing, re-synch with MTS
XX *
XX * Revision 1.1 88/04/11 18:27:10 hyc
XX * Initial revision
XX *
XX * Revision 1.1 87/12/19 04:05:16 hyc
XX * Initial revision
XX *
XX * Revision 1.3 87/08/13 17:05:11 hyc
XX * Run thru indent, fixed some signed vs. unsigned problems
XX * with bp <-> buf, and inbuf and localbuf...
XX * Revision 1.2 87/07/21 08:57:19 hyc *** empty
XX * log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - ARCPACK
XX *
XX * Version 3.46, created on 10/23/86 at 17:47:06
XX *
XX * (C) COPYRIGHT 1985,86 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file contains the routines used to compress a file when
XX * placing it in an archive.
XX *
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX#ifdef MTS
XX#include <ctype.h>
XX#endif
XX
XX/* stuff for non-repeat packing */
XX
XX#define DLE 0x90 /* repeat sequence marker */
XX
XXstatic unsigned char state; /* current packing state */
XX
XX/* non-repeat packing states */
XX
XX#define NOHIST 0 /* don't consider previous input */
XX#define SENTCHAR 1 /* lastchar set, no lookahead yet */
XX#define SENDNEWC 2 /* run over, send new char next */
XX#define SENDCNT 3 /* newchar set, send count next */
XX
XX/* packing results */
XX
XXstatic LONG stdlen; /* length for standard packing */
XXstatic INT crcval; /* CRC check value */
XX
XXINT
XXpack(f, t, hdr) /* pack file into an archive */
XX FILE *f, *t; /* source, destination */
XX struct heads *hdr; /* pointer to header data */
XX{
XX INT c; /* one character of stream */
XX LONG ncrlen; /* length after packing */
XX LONG lzwlen; /* length after crunching */
XX LONG pred_cm(), sqpred_cm(); /* dynamic crunching cleanup */
XX LONG tloc, ftell(); /* start of output */
XX INT getch();
XX INT getc_ncr();
XX INT putc_pak();
XX
XX /* first pass - see which method is best */
XX
XX tloc = ftell(t); /* note start of output */
XX
XX if (!nocomp) { /* if storage kludge not active */
XX if (note) {
XX printf(" analyzing, ");
XX fflush(stdout);
XX }
XX state = NOHIST; /* initialize ncr packing */
XX stdlen = ncrlen = 0; /* reset size counters */
XX crcval = 0; /* initialize CRC check value */
XX setcode(); /* initialize encryption */
XX
XX if (dosquash) {
XX sqinit_cm(f, t);
XX while ((c = getch(f)) != EOF) { /* for each byte of file */
XX ncrlen++; /* one more packed byte */
XX sqputc_cm(c, t); /* see what squashing
XX * can do */
XX }
XX lzwlen = sqpred_cm(t);
XX } else {
XX init_cm(f, t); /* initialize for crunching */
XX
XX while ((c = getc_ncr(f)) != EOF) { /* for each byte of file */
XX ncrlen++; /* one more packed byte */
XX putc_cm(c, t); /* see what crunching can do */
XX }
XX lzwlen = pred_cm(t); /* finish up after crunching */
XX }
XX } else { /* else kludge the method */
XX stdlen = 0; /* make standard look best */
XX ncrlen = lzwlen = 1;
XX }
XX
XX /* standard set-ups common to all methods */
XX
XX fseek(f, 0L, 0); /* rewind input */
XX hdr->crc = crcval; /* note CRC check value */
XX hdr->length = stdlen; /* set actual file length */
XX state = NOHIST; /* reinitialize ncr packing */
XX setcode(); /* reinitialize encryption */
XX
XX /* choose and use the shortest method */
XX
XX if (kludge && note)
XX printf("\n\tS:%ld P:%ld C:%ld,\t ", stdlen, ncrlen, lzwlen);
XX
XX if (stdlen <= ncrlen && stdlen <= lzwlen) {
XX if (note) {
XX printf("storing, "); /* store without compression */
XX fflush(stdout);
XX }
XX hdrver = 2; /* note packing method */
XX fseek(t, tloc, 0); /* reset output for new method */
XX if (nocomp) { /* store only kludge skips things */
XX stdlen = crcval = 0; /* recalc these for kludge */
XX while ((c = getch(f)) != EOF) /* store it straight */
XX putc_pak(c, t);
XX } else
XX filecopy(f, t, stdlen); /* else use fast file copier */
XX hdr->crc = crcval;
XX hdr->length = hdr->size = stdlen;
XX } else if (ncrlen < lzwlen) {
XX if (note)
XX printf("packing, "); /* pack with repeat
XX * suppression */
XX hdrver = 3; /* note packing method */
XX hdr->size = ncrlen; /* set data length */
XX fseek(t, tloc, 0); /* reset output for new method */
XX while ((c = getc_ncr(f)) != EOF)
XX putc_pak(c, t);
XX } else {
XX if (note)
XX printf(dosquash ? "squashed, " : "crunched, ");
XX hdrver = dosquash ? 9 : 8;
XX hdr->size = lzwlen; /* size should not change */
XX }
XX
XX /* standard cleanups common to all methods */
XX
XX if (note)
XX printf("done.\n");
XX}
XX
XX/*
XX * Non-repeat compression - text is passed through normally, except that a
XX * run of more than two is encoded as:
XX *
XX * <char> <DLE> <count>
XX *
XX * Special case: a count of zero indicates that the DLE is really a DLE, not a
XX * repeat marker.
XX */
XX
XXINT
XXgetc_ncr(f) /* get bytes with collapsed runs */
XX FILE *f; /* file to get from */
XX{
XX static INT lastc; /* value returned on last call */
XX static INT repcnt; /* repetition counter */
XX static INT c; /* latest value seen */
XX
XX switch (state) { /* depends on our state */
XX case NOHIST: /* no relevant history */
XX state = SENTCHAR;
XX return lastc = getch(f); /* remember the value next
XX * time */
XX
XX case SENTCHAR: /* char was sent. look ahead */
XX switch (lastc) {/* action depends on char */
XX case DLE: /* if we sent a real DLE */
XX state = NOHIST; /* then start over again */
XX return 0; /* but note that the DLE was real */
XX
XX case EOF: /* EOF is always a special case */
XX return EOF;
XX
XX default: /* else test for a repeat */
XX for (repcnt = 1; (c = getch(f)) == lastc && repcnt < 255; repcnt++);
XX /* find end of run */
XX
XX switch (repcnt) { /* action depends on run size */
XX case 1:/* not a repeat */
XX return lastc = c; /* but remember value
XX * next time */
XX
XX case 2:/* a repeat, but too short */
XX state = SENDNEWC; /* send the second one
XX * next time */
XX return lastc;
XX
XX default: /* a run - compress it */
XX state = SENDCNT; /* send repeat count
XX * next time */
XX return DLE; /* send repeat marker this
XX * time */
XX }
XX }
XX
XX case SENDNEWC: /* send second char of short run */
XX state = SENTCHAR;
XX return lastc = c;
XX
XX case SENDCNT: /* sent DLE, now send count */
XX state = SENDNEWC;
XX return repcnt;
XX
XX default:
XX abort("Bug - bad ncr state\n");
XX }
XX}
XX
XXINT
XXgetch(f) /* special get char for packing */
XX FILE *f; /* file to get from */
XX{
XX INT c; /* a char from the file */
XX#ifndef MSDOS
XX static INT cr = 0; /* add \r before \n ? */
XX
XX if (cr) {
XX c = '\n';
XX#ifdef MTS
XX c = toascii(c);
XX#endif
XX crcval = addcrc(crcval, c);
XX stdlen++;
XX cr = 0;
XX return (c);
XX }
XX#endif
XX
XX if ((c = fgetc(f)) != EOF) { /* if not the end of file */
XX#ifndef MSDOS
XX if (!image && (c == '\n')) {
XX c = '\r';
XX cr = 1;
XX }
XX#endif
XX#ifdef MTS
XX if (!image)
XX c = toascii(c);
XX#endif
XX crcval = addcrc(crcval, c); /* then update CRC check
XX * value */
XX stdlen++; /* and bump length counter */
XX }
XX return c;
XX}
XX
XXINT
XXputc_pak(c, f) /* put a packed byte into archive */
XX char c; /* byte to put */
XX FILE *f; /* archive to put it in */
XX{
XX putc_tst(code(c), f); /* put encoded byte, with checks */
XX}
SHAR_EOF
if test 7219 -ne "`wc -c arcpack.c`"
then
echo shar: error transmitting arcpack.c '(should have been 7219 characters)'
fi
echo shar: extracting arcrun.c '(3287 characters)'
sed 's/^XX//' << \SHAR_EOF > arcrun.c
XX/*
XX * $Log: arcrun.c,v $
XX * Revision 1.3 87/08/13 17:05:51 hyc
XX * Run thru indent, fixed some signed vs. unsigned problems
XX * with bp <-> buf, and inbuf and localbuf...
XX * Revision 1.2 87/07/21 09:08:37 hyc *** empty
XX * log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - ARCRUN
XX *
XX * Version 1.20, created on 03/24/86 at 19:34:31
XX *
XX * (C) COPYRIGHT 1985,85 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file contains the routines used to "run" a file which is
XX * stored in an archive. At present, all we really do is (a) extract a
XX * temporary file, (b) give its name as a system command, and then (c) delete
XX * the file.
XX *
XX * Language: Computer Innovations Optimizing C86
XX */
XX#include <stdio.h>
XX#include "arc.h"
XX
XXINT
XXrunarc(num, arg) /* run file from archive */
XX INT num; /* number of arguments */
XX char *arg[]; /* pointers to arguments */
XX{
XX struct heads hdr; /* file header */
XX char *makefnam(); /* filename fixer */
XX char buf[100]; /* filename buffer */
XX FILE *fopen();/* file opener */
XX INT runfile();
XX
XX rempath(num, arg); /* strip off paths */
XX
XX openarc(0); /* open archive for reading */
XX
XX if (num) { /* if files were named */
XX while (readhdr(&hdr, arc)) { /* while more files to check */
XX if (match(hdr.name, makefnam(arg[0], ".*", buf)))
XX runfile(&hdr, num, &arg[1]);
XX else
XX fseek(arc, hdr.size, 1);
XX }
XX } else
XX while (readhdr(&hdr, arc)) /* else run all files */
XX runfile(&hdr, 0, NULL);
XX
XX closearc(0); /* close archive after changes */
XX}
XX
XXstatic INT
XXrunfile(hdr, num, arg) /* run a file */
XX struct heads *hdr; /* pointer to header data */
XX INT num; /* number of arguments */
XX char *arg[]; /* pointers to arguments */
XX{
XX FILE *tmp, *fopen(); /* temporary file */
XX char buf[100], *makefnam(); /* temp file name, fixer */
XX char sys[100]; /* invocation command buffer */
XX char *dir, *gcdir(); /* directory stuff */
XX INT n; /* index */
XX
XX /* makefnam("$ARCTEMP",hdr->name,buf); */
XX sprintf(buf, "%s.RUN", arctemp);
XX
XX#ifdef MSDOS
XX if (!strcmp(buf, "$ARCTEMP.BAS"))
XX strcpy(sys, "BASICA $ARCTEMP");
XX
XX else if (!strcmp(buf, "$ARCTEMP.BAT")
XX || !strcmp(buf, "$ARCTEMP.COM")
XX || !strcmp(buf, "$ARCTEMP.EXE"))
XX strcpy(sys, "$ARCTEMP");
XX
XX else {
XX if (warn) {
XX printf("File %s is not a .BAS, .BAT, .COM, or .EXE\n",
XX hdr->name);
XX nerrs++;
XX }
XX fseek(arc, hdr->size, 1); /* skip this file */
XX return;
XX }
XX#endif
XX
XX if (warn)
XX if (tmp = fopen(buf, "r"))
XX abort("Temporary file %s already exists", buf);
XX if (!(tmp = fopen(buf, "w")))
XX abort("Unable to create temporary file %s", buf);
XX
XX if (note)
XX printf("Invoking file: %s\n", hdr->name);
XX
XX for (n = 0; n < num; n++) { /* add command line arguments */
XX strcat(buf, " ");
XX strcat(buf, arg[n]);
XX }
XX
XX dir = gcdir(""); /* see where we are */
XX unpack(arc, tmp, hdr); /* unpack the entry */
XX fclose(tmp); /* release the file */
XX chmod(buf, "700"); /* make it executable */
XX system(buf); /* try to invoke it */
XX chdir(dir);
XX free(dir); /* return to whence we started */
XX if (unlink(buf) && warn) {
XX printf("Cannot unsave temporary file %s\n", buf);
XX nerrs++;
XX }
XX}
SHAR_EOF
if test 3287 -ne "`wc -c arcrun.c`"
then
echo shar: error transmitting arcrun.c '(should have been 3287 characters)'
fi
echo shar: extracting arcs.h '(1832 characters)'
sed 's/^XX//' << \SHAR_EOF > arcs.h
XX/*
XX * $Log: arcs.h,v $
XX * Revision 1.3 87/08/13 17:05:55 hyc
XX * Run thru indent, fixed some signed vs. unsigned problems
XX * with bp <-> buf, and inbuf and localbuf...
XX * Revision 1.2 87/07/21 09:09:47 hyc *** empty
XX * log message ***
XX *
XX */
XX
XX/*
XX * ARC - Archive utility - Archive file header format
XX *
XX * Version 2.12, created on 12/17/85 at 14:40:26
XX *
XX * (C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
XX *
XX * By: Thom Henderson
XX *
XX * Description: This file defines the format of an archive file header,
XX * excluding the archive marker and the header version number.
XX *
XX * Each entry in an archive begins with a one byte archive marker, which is set
XX * to 26. The marker is followed by a one byte header type code, from zero
XX * to 7.
XX *
XX * If the header type code is zero, then it is an end marker, and no more data
XX * should be read from the archive.
XX *
XX * If the header type code is in the range 2 to 7, then it is followed by a
XX * standard archive header, which is defined below.
XX *
XX * If the header type code is one, then it is followed by an older format
XX * archive header. The older format header does not contain the true length.
XX * A header should be read for a length of sizeof(struct heads)-sizeof(long).
XX * Then set length equal to size and change the header version to 2.
XX *
XX * Programming note: The crc value given in the header is based on the unpacked
XX * data.
XX *
XX * Language: Computer Innovations Optimizing C86
XX */
XX
XXstruct heads { /* archive entry header format */
XX char name[13]; /* file name */
XX LONG size; /* size of file, in bytes */
XX unsigned INT date; /* creation date */
XX unsigned INT time; /* creation time */
XX INT crc; /* cyclic redundancy check */
XX LONG length; /* true file length */
XX};
SHAR_EOF
if test 1832 -ne "`wc -c arcs.h`"
then
echo shar: error transmitting arcs.h '(should have been 1832 characters)'
fi
echo shar: extracting arcsqs.c '(11224 characters)'
sed 's/^XX//' << \SHAR_EOF > arcsqs.c
XX/* ARC - Archive utility - SQUASH
XX
XX(C) COPYRIGHT 1985 by System Enhancement Associates; ALL RIGHTS RESERVED
XX
XX This is a quick hack to ARCLZW to make it handle squashed archives.
XX Dan Lanciani (ddl at harvard.*) July 87
XX
XX*/
XX#include <stdio.h>
XX#include "arc.h"
XX
XX/* definitions for the new dynamic Lempel-Zev crunching */
XX
XX#define BITS 13 /* maximum bits per code */
XX#define HSIZE 10007 /* 80% occupancy */
XX#define INIT_BITS 9 /* initial number of bits/code */
XXstatic INT n_bits; /* number of bits/code */
XXstatic INT maxcode; /* maximum code, given n_bits */
XX#define MAXCODE(n) ((1<<(n)) - 1) /* maximum code calculation */
XXstatic INT maxcodemax = 1 << BITS; /* largest possible code (+1) */
XX
XXstatic unsigned char buf[BITS]; /* input/output buffer */
XX
XXstatic unsigned char lmask[9] = /* left side masks */
XX{0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
XXstatic unsigned char rmask[9] = /* right side masks */
XX{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
XX
XXstatic INT offset; /* byte offset for code output */
XXstatic long in_count; /* length of input */
XXstatic long bytes_out; /* length of compressed output */
XXstatic unsigned INT ent;
XX
XXstatic long htab[HSIZE]; /* hash code table (crunch) */
XXstatic unsigned INT codetab[HSIZE]; /* string code table (crunch) */
XX
XXstatic unsigned INT *prefix = codetab; /* prefix code table (uncrunch) */
XX
XXstatic unsigned char suffix[HSIZE]; /* suffix table (uncrunch) */
XXstatic INT free_ent; /* first unused entry */
XXstatic INT firstcmp; /* true at start of compression */
XXstatic unsigned char stack[HSIZE]; /* local push/pop stack */
XX
XX/*
XX * block compression parameters -- after all codes are used up,
XX * and compression rate changes, start over.
XX */
XX
XXstatic INT clear_flg;
XXstatic long ratio;
XX#define CHECK_GAP 10000 /* ratio check interval */
XXstatic long checkpoint;
XXstatic INT putcode();
XX
XX/*
XX * the next two codes should not be changed lightly, as they must not
XX * lie within the contiguous general code space.
XX */
XX#define FIRST 257 /* first free entry */
XX#define CLEAR 256 /* table clear output code */
XX
XXstatic INT
XXcl_block(t) /* table clear for block compress */
XX FILE *t; /* our output file */
XX{
XX long rat;
XX
XX checkpoint = in_count + CHECK_GAP;
XX
XX if (in_count > 0x007fffff) { /* shift will overflow */
XX rat = bytes_out >> 8;
XX if (rat == 0) /* Don't divide by zero */
XX rat = 0x7fffffff;
XX else
XX rat = in_count / rat;
XX } else
XX rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
XX
XX if (rat > ratio)
XX ratio = rat;
XX else {
XX ratio = 0;
XX setmem(htab, HSIZE * sizeof(long), 0xff);
XX free_ent = FIRST;
XX clear_flg = 1;
XX putcode(CLEAR, t);
XX }
XX}
XX
XX/*****************************************************************
XX *
XX * Output a given code.
XX * Inputs:
XX * code: A n_bits-bit integer. If == -1, then EOF. This assumes
XX * that n_bits =< (long)wordsize - 1.
XX * Outputs:
XX * Outputs code to the file.
XX * Assumptions:
XX * Chars are 8 bits long.
XX * Algorithm:
XX * Maintain a BITS character long buffer (so that 8 codes will
XX * fit in it exactly). When the buffer fills up empty it and start over.
XX */
XX
XXstatic INT
XXputcode(code, t) /* output a code */
XX INT code; /* code to output */
XX FILE *t; /* where to put it */
XX{
XX INT r_off = offset; /* right offset */
XX INT bits = n_bits; /* bits to go */
XX unsigned char *bp = buf; /* buffer pointer */
XX INT n; /* index */
XX
XX if (code >= 0) { /* if a real code *//* Get to the first byte. */
XX bp += (r_off >> 3);
XX r_off &= 7;
XX
XX /*
XX * Since code is always >= 8 bits, only need to mask the
XX * first hunk on the left.
XX */
XX *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
XX bp++;
XX bits -= (8 - r_off);
XX code >>= (8 - r_off);
XX
XX /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX if (bits >= 8) {
XX *bp++ = code;
XX code >>= 8;
XX bits -= 8;
XX }
XX /* Last bits. */
XX if (bits)
XX *bp = code;
XX
XX offset += n_bits;
XX
XX if (offset == (n_bits << 3)) {
XX bp = buf;
XX bits = n_bits;
XX bytes_out += bits;
XX do
XX putc_pak(*bp++, t);
XX while (--bits);
XX offset = 0;
XX }
XX /*
XX * If the next entry is going to be too big for the code
XX * size, then increase it, if possible.
XX */
XX if (free_ent > maxcode || clear_flg > 0) { /* Write the whole
XX * buffer, because the
XX * input side won't
XX * discover the size
XX * increase until after
XX * it has read it. */
XX if (offset > 0) {
XX bp = buf; /* reset pointer for writing */
XX bytes_out += n = n_bits;
XX while (n--)
XX putc_pak(*bp++, t);
XX }
XX offset = 0;
XX
XX if (clear_flg) { /* reset if clearing */
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX clear_flg = 0;
XX } else {/* else use more bits */
XX n_bits++;
XX if (n_bits == BITS)
XX maxcode = maxcodemax;
XX else
XX maxcode = MAXCODE(n_bits);
XX }
XX }
XX } else { /* dump the buffer on EOF */
XX bytes_out += n = (offset + 7) / 8;
XX
XX if (offset > 0)
XX while (n--)
XX putc_pak(*bp++, t);
XX offset = 0;
XX }
XX}
XX
XX/*****************************************************************
XX *
XX * Read one code from the standard input. If EOF, return -1.
XX * Inputs:
XX * cmpin
XX * Outputs:
XX * code or -1 is returned.
XX */
XX
XXstatic INT
XXgetcode(f) /* get a code */
XX FILE *f; /* file to get from */
XX{
XX INT code;
XX static INT offset = 0, size = 0;
XX INT r_off, bits;
XX unsigned char *bp = buf;
XX
XX if (clear_flg > 0 || offset >= size || free_ent > maxcode) { /* If the next entry
XX * will be too big for
XX * the current code
XX * size, then we must
XX * increase the size.
XX * This implies reading
XX * a new buffer full,
XX * too. */
XX if (free_ent > maxcode) {
XX n_bits++;
XX if (n_bits == BITS)
XX maxcode = maxcodemax; /* won't get any bigger
XX * now */
XX else
XX maxcode = MAXCODE(n_bits);
XX }
XX if (clear_flg > 0) {
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX clear_flg = 0;
XX }
XX for (size = 0; size < n_bits; size++) {
XX if ((code = getc_unp(f)) == EOF)
XX break;
XX else
XX buf[size] = code;
XX }
XX if (size <= 0)
XX return -1; /* end of file */
XX
XX offset = 0;
XX /* Round size down to integral number of codes */
XX size = (size << 3) - (n_bits - 1);
XX }
XX r_off = offset;
XX bits = n_bits;
XX
XX /*
XX * Get to the first byte.
XX */
XX bp += (r_off >> 3);
XX r_off &= 7;
XX
XX /* Get first part (low order bits) */
XX code = (*bp++ >> r_off);
XX bits -= 8 - r_off;
XX r_off = 8 - r_off; /* now, offset into code word */
XX
XX /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
XX if (bits >= 8) {
XX code |= *bp++ << r_off;
XX r_off += 8;
XX bits -= 8;
XX }
XX /* high order bits. */
XX code |= (*bp & rmask[bits]) << r_off;
XX offset += n_bits;
XX
XX return code;
XX}
XX
XX/*
XX * compress a file
XX *
XX * Algorithm: use open addressing double hashing (no chaining) on the
XX * prefix code / next character combination. We do a variant of Knuth's
XX * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
XX * secondary probe. Here, the modular division first probe is gives way
XX * to a faster exclusive-or manipulation. Also do block compression with
XX * an adaptive reset, where the code table is cleared when the compression
XX * ratio decreases, but after the table fills. The variable-length output
XX * codes are re-sized at this point, and a special CLEAR code is generated
XX * for the decompressor.
XX */
XX
XXINT
XXsqinit_cm(f, t) /* initialize for compression */
XX FILE *f; /* file we will be compressing */
XX FILE *t; /* where we will put it */
XX{
XX offset = 0;
XX bytes_out = 0;
XX clear_flg = 0;
XX ratio = 0;
XX in_count = 1;
XX checkpoint = CHECK_GAP;
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX free_ent = FIRST;
XX setmem(htab, HSIZE * sizeof(long), 0xff);
XX n_bits = INIT_BITS; /* set starting code size */
XX
XX firstcmp = 1; /* next byte will be first */
XX}
XX
XXINT
XXsqputc_cm(c, t) /* compress a character */
XX unsigned char c; /* character to compress */
XX FILE *t; /* where to put it */
XX{
XX static long fcode;
XX static INT hshift;
XX INT i;
XX INT disp;
XX
XX if (firstcmp) { /* special case for first byte */
XX ent = c; /* remember first byte */
XX
XX hshift = 0;
XX for (fcode = (long) HSIZE; fcode < 65536L; fcode *= 2L)
XX hshift++;
XX hshift = 8 - hshift; /* set hash code range bund */
XX
XX firstcmp = 0; /* no longer first */
XX return;
XX }
XX in_count++;
XX fcode = (long) (((long) c << BITS) + ent);
XX i = (c << hshift) ^ ent;/* xor hashing */
XX
XX if (htab[i] == fcode) {
XX ent = codetab[i];
XX return;
XX } else if (htab[i] < 0) /* empty slot */
XX goto nomatch;
XX disp = HSIZE - i; /* secondary hash (after G.Knott) */
XX if (i == 0)
XX disp = 1;
XX
XXprobe:
XX if ((i -= disp) < 0)
XX i += HSIZE;
XX
XX if (htab[i] == fcode) {
XX ent = codetab[i];
XX return;
XX }
XX if (htab[i] > 0)
XX goto probe;
XX
XXnomatch:
XX putcode(ent, t);
XX ent = c;
XX if (free_ent < maxcodemax) {
XX codetab[i] = free_ent++; /* code -> hashtable */
XX htab[i] = fcode;
XX } else if ((long) in_count >= checkpoint)
XX cl_block(t);
XX}
XX
XXlong
XXsqpred_cm(t) /* finish compressing a file */
XX FILE *t; /* where to put it */
XX{
XX putcode(ent, t); /* put out the final code */
XX putcode(-1, t); /* tell output we are done */
XX
XX return bytes_out; /* say how big it got */
XX}
XX
XX/*
XX * Decompress a file. This routine adapts to the codes in the file
XX * building the string table on-the-fly; requiring no table to be stored
XX * in the compressed file. The tables used herein are shared with those of
XX * the compress() routine. See the definitions above.
XX */
XX
XXINT
XXsqdecomp(f, t) /* decompress a file */
XX FILE *f; /* file to read codes from */
XX FILE *t; /* file to write text to */
XX{
XX unsigned char *stackp;
XX INT finchar;
XX INT code, oldcode, incode;
XX
XX n_bits = INIT_BITS; /* set starting code size */
XX clear_flg = 0;
XX
XX /*
XX * As above, initialize the first 256 entries in the table.
XX */
XX maxcode = MAXCODE(n_bits = INIT_BITS);
XX for (code = 255; code >= 0; code--) {
XX prefix[code] = 0;
XX suffix[code] = (unsigned char) code;
XX }
XX free_ent = FIRST;
XX
XX finchar = oldcode = getcode(f);
XX if (oldcode == -1) /* EOF already? */
XX return; /* Get out of here */
XX putc_unp((char) finchar, t); /* first code must be 8 bits=char */
XX stackp = stack;
XX
XX while ((code = getcode(f)) > -1) {
XX if (code == CLEAR) {
XX for (code = 255; code >= 0; code--)
XX prefix[code] = 0;
XX clear_flg = 1;
XX free_ent = FIRST - 1;
XX if ((code = getcode(f)) == -1) /* O, untimely death! */
XX break;
XX }
XX incode = code;
XX /*
XX * Special case for KwKwK string.
XX */
XX if (code >= free_ent) {
XX *stackp++ = finchar;
XX code = oldcode;
XX }
XX /*
More information about the Alt.sources
mailing list