/* (c) Copyright 1987 Michael Goetz								*/
/*																			*/
/*				File:		  	 compress								*/
/*				Returns:	  												*/
/*				Parameters:												*/
/*																			*/
/*																			*/

#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <math.h>

#include "advm.h"

void cmpres();
void init();
void getkey(int *, int *, int *, int *);
void putkey(int, int, int, unsigned char);
void putky2(int, int, int, unsigned char);
void close_files();
char getchr();
void putcr2(unsigned char);
void putchr(unsigned char);
void putcr3(unsigned char);
void putcr4(unsigned char);
void puthuf(unsigned char);
void flhuf();
void squeze();
void flush(unsigned char *, int);
unsigned char encryp(unsigned char, int);
int white(unsigned char);
void inithf();
void getwrd(unsigned char);
void prtwrd();
void huff(int *, int *);
int findsp(int, unsigned char *, long *);
float hfsqz(int, int, unsigned char *, int *);
void huff2(int, unsigned char *, unsigned char *, long *);
void tree(int, unsigned char *);
void prtree(int, unsigned char *);


FILE *file10, *file6, *file7, *file8, *file9;

int pass;		/* passx */
int inrec, outrec, inptr, outptr;	/* ptrs */
int cr2cnt;		/* cr2com */
int linect, charct;	/* chrcom */
int sflag, sflag2;	/* sflag */
int inkey;		/* inptr */
int outkey;		/* outptr */
long incnt = 0l, outcnt = 0l;	/* counts */
struct {
	int k1[18], k2[18], krec[18];
	unsigned char kptr[18];
	} in;			/* inkey */
struct {
	int k1[18], k2[18], krec[18];
	unsigned char kptr[18];
	} out;			/* outkey */
struct {
	int k1,k2,rc;
	unsigned char pr;
	} sav;			/* savkey */
int hufptr, ndx[256];	/* hufdat */
unsigned char hufc, size[256], htree[750]; /* hufdat */
unsigned long code[256];	/* hufdat */
int start[10] = {1,2,9,14,23,28,30,32,0,0};		/* words */
unsigned char chars[129] = 
		"ainisoftoatitonandaretheyououtfromwithyourhereintohaveroomthatdowntherelargesmallwhichnorthyou'relittlepassagethroughpassages   ", 
	index[32] = {1,2,4,6,8,10,12,14,16,19,22,25,28,31,35,39,43,47,51,
                55,59,63,67,72,77,82,87,92,98,104,111,118}, 
	len[32] = {1,2,2,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,
              5,6,6,7,7,8};	/* words */
unsigned char out_buffer[128];		/* outbuf */
unsigned char in_buffer[128];			/* inbuf */
int count [256];							/* hwords */


main()
{
	pass = 1;
	cmpres();
	prtwrd();
	pass = 2;
	cmpres();
}

void cmpres()

{
	int key1,key2,rec,ptr;
	
	init();
	while (TRUE)
		{
		getkey(&key1, &key2, &rec, &ptr);
		if (key1 == -1 || key2 == -1)
			break;
		if (rec != inrec || ptr != inptr)
			{
			printf("Error: files out of sync.\n");
			exit(1);
			}
		putkey(key1, key2, outrec, outptr);
		squeze();
		}
	close_files();
	if (pass == 1)
			return;
	execl("keysort.exe",NULL);
	printf("Pass 3 (KEYSORT) not found\n");
	exit(1);
}

void init()

{
	cr2cnt = 0;
	linect = charct = 1;
	if (pass == 1)
		inithf();
	sflag = sflag2 = FALSE;		/* fake this */
	file6 = fopen("advt.tmp","rb");
	file7 = fopen("advtkey.tmp","rb");
	if (pass == 2)
		{
		file8 = fopen("advt.dat","wb");
		file9 = fopen("advt.key","wb");
		}
	inrec = outrec = inptr = outptr = outkey = 1;
	inkey = 0;
	incnt = outcnt = 0l;
	printf ("Begin Pass 2 (Text Compression) - Phase %d\n\n",pass);
}

void getkey(key1, key2, rec, ptr)
int *key1, *key2, *rec, *ptr;

{
	if (!inkey)
		{
		int temp;

		if (fread((char *)in.k1, sizeof(in.k1[0]), 18, file7) != 18 ||
			 fread((char *)in.k2, sizeof(in.k2[0]), 18, file7) != 18 ||
			 fread((char *)in.krec, sizeof(in.krec[0]), 18, file7) != 18 ||
			 fread((char *)in.kptr, sizeof(in.kptr[0]), 18, file7) != 18 ||
			 fread((char *)&temp, 2, 1, file7) != 1)
			{
			printf ("Error reading text keys\n");
			exit(1);
			}
		}
	*key1 = in.k1[inkey];
	*key2 = in.k2[inkey];
	*rec = in.krec[inkey];
	*ptr = in.kptr[inkey];
	if (++inkey == 18) inkey = 0;
}

void putkey(key1, key2, rec, ptr)
int key1, key2, rec;
unsigned char ptr;

{
	sav.k1 = key1;
	sav.k2 = key2;
	sav.rc = rec;
	sav.pr = ptr;
}

void putky2(key1, key2, rec, ptr)
int key1, key2, rec;
unsigned char ptr;

{
	out.k1[outkey-1] = key1;
	out.k2[outkey-1] = key2;
	out.krec[outkey-1] = rec;
	out.kptr[outkey-1] = ptr;
	if (++outkey > 18)
		{
		if (pass == 2)
			{
			int pad = 0;

			if (fwrite((char *)out.k1,1,sizeof(out.k1),file9) != sizeof(out.k1) ||
				 fwrite((char *)out.k2,1,sizeof(out.k2),file9) != sizeof(out.k2) ||
				 fwrite((char *)out.krec,1,sizeof(out.krec),file9) != sizeof(out.krec) ||
				 fwrite(out.kptr,1,sizeof(out.kptr),file9) != sizeof(out.kptr) ||
					/* write out 0x0000 to be compatible with CPM 128 byte sectors */
				 fwrite((char *)&pad,2,1,file9) != 1)
				{
				printf("Error writing text keys\n");
				exit(1);
				}
			}
		outkey = 1;
		}
}

void close_files()

{
	if (pass == 2)
		{
		int pad = 0;

		file10 = fopen("compress.dat","wb");
		fwrite(chars, 1, sizeof(chars)-1, file10);
		fwrite(index, 1, sizeof(index), file10);
		fwrite(len, 1, sizeof(index), file10);
		fwrite((char *)&sflag, 1, 1, file10);
		fwrite((char *)&sflag2, 1, 1, file10);
		fwrite(htree, 1, sizeof(htree), file10);
		fclose(file10);
		out.k1[outkey-1] = out.k2[outkey-1] = -1;
		if (fwrite((char *)out.k1,1,sizeof(out.k1),file9) != sizeof(out.k1) ||
			 fwrite((char *)out.k2,1,sizeof(out.k2),file9) != sizeof(out.k2) ||
			 fwrite((char *)out.krec,1,sizeof(out.krec),file9) != sizeof(out.krec) ||
			 fwrite(out.kptr,1,sizeof(out.kptr),file9) != sizeof(out.kptr) ||
				/* write out 0x0000 to be compatible with CPM 128 byte sectors */
			 fwrite((char *)&pad,2,1,file9) != 1)
			{
			printf("Error writing text keys\n");
			exit(1);
			}
		fclose(file9);
		if (outptr > 1)
			if (fwrite(out_buffer, 1, sizeof(out_buffer), file8) != sizeof(out_buffer))
				{
				printf("Error writing text data\n");
				exit(1);
				}
		fclose(file8);
		}
	printf("Bytes in:    %8ld (%ldK)\n",incnt, (incnt+1023l)/1024l);
	printf("Bytes out:   %8ld (%ldK)\n",outcnt, (outcnt+1023l)/1024l);
	printf("Bytes saved: %8ld (%ldK)\n\n",incnt-outcnt,
		(incnt+1023l)/1024l - (outcnt+1023l)/1024l);
	fclose(file6);
	fclose(file7);
}

char getchr()

{
	char a;

	incnt++;
	if (inptr == 1)
		if (fread(in_buffer, 1, sizeof(in_buffer), file6) != sizeof(in_buffer))
			{
			printf("Error reading text data\n");
			exit(1);
			}
	a = in_buffer[inptr-1];
	if (++inptr > 128)
		{
		inrec++;
		inptr = 1;
		}
}

void putcr2(chr)
unsigned char chr;

{
	if (chr == ' ')
		{
		cr2cnt++;
		if (cr2cnt >= 31)
			{
			putcr3(cr2cnt);
			cr2cnt = 0;
			}
		}
	else
		{
 		if (cr2cnt)
			{
			if (cr2cnt == 1)
				putcr3(' ');
			else
				putcr3(cr2cnt);
			cr2cnt = 0;
			}
		putcr3(chr);
		}
}

void putchr(chr)
unsigned char chr;

{
	static unsigned char lines [10][81];
	static int rec[10];
	static unsigned char ptr[10];
	int i,j;
	unsigned char *c;

	lines[0][charct-1] = chr;
	if (chr)
		{
		if (++charct > 80)
			{
			printf("Line too long.\n");
			exit(1);
			}
		}
	else
		{
		charct = 1;
		for (i=2; i<=linect; i++)
			if (!strcmp(lines[0], lines[i-1]))
				{
				putky2(sav.k1, sav.k2, rec[i-1], ptr[i-1]);
				return;
				}
		ptr[0] = sav.pr;
		rec[0] = sav.rc;
		if (linect < 10) linect++;
		for (i=linect; i>1; i--)
			{
			ptr[i-1] = ptr[i-2];
			rec[i-1] = rec[i-2];
			strcpy(lines[i-1], lines[i-2]);
			}
		putky2(sav.k1, sav.k2, sav.rc, sav.pr);
		c = lines[0] - 1;
		do {
			putcr2(*(++c));
			} while (*c);
		}
}

void putcr3(chr)
unsigned char chr;

{
	if (pass == 1)
		{
		getwrd(chr);
		putcr4(chr);
		}
	else
		puthuf(chr);
}

void putcr4(chr)
unsigned char chr;

{
	outcnt++;
	out_buffer[outptr-1] = encryp(chr, outptr);
	if (++outptr > 128)
		{
 		if (pass == 2)
			if (fwrite(out_buffer, 1, sizeof(out_buffer), file8) != sizeof(out_buffer))
				{
				printf("Error writing text data\n");
				exit(1);
				}
		outptr = 1;
		outrec++;
		}
}

void puthuf(chr)
unsigned char chr;

{
	int i,index = ndx[chr];

	if (!index)
		{
		printf("FATAL ERROR IN HUFFMAN ENCODING: CHARACTER %d UNDEFINED.\n",chr);
		exit(1);
		}
	for (i=size[index-1]; i; i--)
		{
		if (code[index-1] & (1l << (i-1)))
			hufc |= (1 << hufptr);
		if (++hufptr == 8)
			flhuf();
		}
	if (!chr && hufptr)
		flhuf();
}

void flhuf()

{
	putcr4(hufc);
	hufptr = 0;
	hufc = 0;
}

void squeze()

{
	unsigned char a, flags, word[11];
	int cflag,i,k,j;

	while (TRUE)
		{
label_10:
		a=getchr();
		if (!a) 
			{
			putchr(a);
			return;
			}
		if (white(a))
			{
label_80:
			putchr(a);
			if (!a)
				return;
			else
				continue;
			}
		for (i=1, cflag = isupper(a); i<=8; i++)
			{
			word[i-1] = a;
			a = getchr();
			if (!a || white(a))
				{
				for (k=start[i-1]; len[k-1] == i && k<=32; k++)
					{
					for (j=1; j<=i; j++)
						{
						if (cflag && j==1 && chars[index[k-1]+j-2] == word[0]-'A'+'a')
							continue;
						if (chars[index[k-1]+j-2] != word[j-1])
							break;
						}
					if (j <= i) continue;
					flags = 0x80 | (k-1);
					if (cflag) flags |= 0x40;
					if (a == ' ') flags |= 0x20;
					putchr(flags);
					if (a == ' ')
						goto label_10;
					else
						goto label_80;
					}
				/* label 100 */
				flush(word,i);
				goto label_80;
				}
			}
		flush(word,8);
		do {
			putchr(a);
			a=getchr();
			if (!a) 
				{
				putchr(a);
				return;
				}
			} while (!white(a));
		goto label_80;
		}
}

void flush(word, n)
unsigned char *word;
int n;

{
	int i;

	for (i=0; i<n; i++)
		putchr(word[i]);
}

unsigned char encryp(word, ptr)
unsigned char word;
int ptr;

{
	return ((word+ptr) ^ 0x75);
}


int white(c)
unsigned char c;

{
	return ((int)strchr(" .?!,\":;()[]<>-",c));
}


void inithf()

{
	int i;

	for (i=0; i<256; i++)
		count[i] = 0;
	hufptr = 0;
	hufc = 0;
}

void getwrd(chr)
unsigned char chr;

{
	unsigned char nxtchr;

	count[chr]++;
}

void prtwrd()

{
	long size = 0l, tsize = 0l;
	int i, isize, len [256];
	float temp;

	file10 = fopen("ana.dat","wt");
	for (i=0; i<256; i++)
		size += count[i];
	fprintf(file10, "TOTAL SIZE = %8ld\n CHAR  COUNT    CODE SIZE\n\n",size);
	for (i=0; i<256; i++)
		{
		if (!count[i]) continue;
		len[i] = isize = ceil(0 - log(((float)count[i])/size) / log(2.0));
		fprintf(file10,"%4d %c %6d          %2d\n",
			i,(i < 0x20 || i > 0x7e) ? ' ' : i, count[i], isize);
		tsize += (count[i] * isize);
		}
	fprintf(file10, "\nCOMPRESSED FILE IS FROM %ld TO %ld BYTES.\n",
		tsize/8,tsize/8+(count[0]*7)/8);
	huff(count,len);
	fclose(file10);
}

void huff(count, osize)
int count[256], osize[256];

{
	int ncount[256], i, j, k, kk, tsize;
	unsigned char xchar[256];
	float c;

	for (i=0; i<256; i++)
		ndx[i] = 0;
	for (i=tsize=0; i<256; i++)
		{
 		if (!count[i]) continue;
		j = 0;
label_20:
		j++;
		if (tsize < j)
			{
			xchar[j-1] = i;
			size[j-1] = osize[i];
			ncount[j-1] = count[i];
			tsize = j;
			ndx[i] = j;
			continue;
			}
		if (ncount[j-1] >= count[i]) goto label_20;
		for (k=j; k<=tsize; k++)
			{
			kk = tsize-k+j;
			xchar[kk] = xchar[kk-1];
			ndx[xchar[kk-1]] = kk+1;
			ncount[kk] = ncount[kk-1];
			size[kk] = size[kk-1];
			}
		xchar[j-1] = i;
		size[j-1] = osize[i];
		ncount[j-1] = count[i];
		tsize++;
		ndx[i] = j;
		}
	c = 0.0;
	while (TRUE)
		{
		huff2(tsize, xchar, size, code);
		i = findsp(tsize, size, code);
		if (!i || i >= size[tsize-1]) break;
		c += hfsqz(i, tsize, size, ncount);
		}
	fprintf(file10, "\f BYTES SAVED BY HUFFMAN CODE OPTIMIZATION: %8.0f\n\nHUFFMAN CODES:\n",c);
	for (i=0; i<tsize; i++)
		{
		fprintf(file10, "%3d %c   %5d    ",xchar[i],(xchar[i]<0x20 || xchar[i]>0x7e)? ' ' : xchar[i], ncount[i]);
		for (j=16; j>=0; j--)
			fprintf(file10, "%c", (j+1 > size[i]) ? ' ' : ((code[i] & (1l << j))?'1':'0') );
		fprintf(file10, " %2d %8.8x\n",size[i],code[i]);
		}
	tree(tsize, xchar);
}

int findsp(tsize, size, code)
long code[256];
int tsize;
unsigned char size[256];

{
	int i,j,jj,k;

	i = size[tsize-2];
	for (j=2; j<=i; j++)
		{
		jj = i-j+2;
		if (!(code[tsize-2] & (1l << (jj-1)))) return (j-1);
		}
	return (0);
}

float hfsqz(i, tsize, size, ncount)
int i, tsize, ncount[256];
unsigned char size[256];

{
 	int last=0, j;
	float val=0l;

	for (j=0; j<tsize; j++)
		{
		if (size[j] == last) continue;
		last = size[j];
		if (size[j] <= i) continue;
		size[j]--;
		val += ncount[j] / 8.0;
		}
	return (val);
}

void huff2(tsize, xchar, size, code)
int tsize;
unsigned char xchar[256], size[256];
long code[256];

{
	int i,j;

	for (i=0; i<256; i++)
		code[i] = 0l;
	for (i=2; i<=tsize; i++)
		{
		code[i-1] = code[i-2]+1;
		if (size[i-1] == size[i-2]) continue;
		code[i-1] <<= (size[i-1] - size[i-2]);
		}
}

void tree(tsize, xchar)
int tsize;
unsigned char xchar[256];

{
	int i,j,next,k;

	for (i=0; i<750; i++)
		htree[i] = 0;
	for (i=0, next = 3; i<tsize; i++)
		{
		j = 1;
		for (k=size[i]-1; k>=0; k--)
			{
			if (next > 749)
				{
				fprintf(file10, "TREE ERROR: ARRAY OVERFLOW\n");
				fprintf(file10, "I=%d char=%d bit=%d ptr=%d next=%d\n",
						i,xchar[i],k,j,next);
				prtree(next, htree);
				exit(1);
				}
			if (code[i] & (1l << k)) j++;
			if (!htree[j-1])
				{
				if (next-j > 127)
					{
					fprintf(file10, "TREE ERROR: OFFSET TOO LARGE\n");
					fprintf(file10, "I=%d char=%d bit=%d ptr=%d next=%d\n",
							i,xchar[i],k,j,next);
					prtree(next, htree);
					exit(1);
					}
				if (k)
					{
					htree[j-1] = next-j;
					j = next;
					next += 2;
					continue;
					}
				htree[j-1] = (next-j) | 0x80;
				htree[next-1] = xchar[i];
				next++;
				continue;
				}
			if (htree[j-1] & 0x80)
				{
				fprintf(file10, "TREE ERROR: TERMINAL FLAG ON WHILE SEARCHING\n");
				fprintf(file10, "I=%d char=%d bit=%d ptr=%d next=%d\n",
						i,xchar[i],k,j,next);
				prtree(next, htree);
				exit(1);
				}
			if (!k)
				{
				fprintf(file10, "TREE ERROR: NON-NULL ENTRY AT LAST BIT POSITION\n");
				fprintf(file10, "I=%d char=%d bit=%d ptr=%d next=%d\n",
						i,xchar[i],k,j,next);
				prtree(next, htree);
				exit(1);
				}
			j += htree[j-1];
			}
		}
	prtree(next, htree);
}

void prtree(next, htree)
int next;
unsigned char htree [750];

{
	int i;

	for (i=0; i<next; i++)
		fprintf(file10, "%4d  %c %3d  %3d\n",
			i, (htree[i] & 0x80) ? '*' : ' ', htree[i] & 0x7f, htree[i]);
}

