v23i067: TRN, version of RN that follows conversation threads, Part08/14

Rich Salz rsalz at bbn.com
Tue Dec 4 07:27:55 AEST 1990


Submitted-by: Wayne Davison <davison at dri.com>
Posting-number: Volume 23, Issue 67
Archive-name: trn/part08

---- Cut Here and unpack ----
#!/bin/sh
# this is part 8 of a multipart archive
# do not concatenate these parts, unpack them in order with /bin/sh
# file mt-lint.h continued
#
CurArch=8
if test ! -r s2_seq_.tmp
then echo "Please unpack part 1 first!"
     exit 1; fi
( read Scheck
  if test "$Scheck" != $CurArch
  then echo "Please unpack part $Scheck next!"
       exit 1;
  else exit 0; fi
) < s2_seq_.tmp || exit 1
echo "x - Continuing file mt-lint.h"
sed 's/^X//' << 'SHAR_EOF' >> mt-lint.h
X#define write_thread		wthred
X#define write_ids		wids
X#define write_item		witem
X#define processed_groups	pgroup
X#define timer_off		timoff
X#define timer_first		tim1st
X#define timer_next		timnxt
X#define truncate_len		trulen
X#define article_array		artarr
X
X#define safemalloc(x)	(NULL)
X#define free(x)		(x = NULL)
X#define Free(x)		(*x = NULL)
X#define signal(x,y)
SHAR_EOF
echo "File mt-lint.h is complete"
chmod 0660 mt-lint.h || echo "restore of mt-lint.h fails"
echo "x - extracting mt-process.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > mt-process.c &&
X/* $Header: mt-process.c,v 4.3.3.1 90/07/28 18:04:45 davison Trn $
X**
X** $Log:	mt-process.c,v $
X** Revision 4.3.3.1  90/07/28  18:04:45  davison
X** Initial Trn Release
X** 
X*/
X
X#include "EXTERN.h"
X#include "common.h"
X#include "mthreads.h"
X#ifdef SERVER
X#include "server.h"
X#endif
X
X#include <time.h>
X#ifndef TZSET
X# include <sys/timeb.h>
X#endif
X
Xchar buff[1024];
X
Xchar references[1024];
X
Xchar subject_str[80];
Xbool found_Re;
X
Xchar author_str[20];
X
Xextern int log_verbosity;
X
Xextern time_t getdate();
X
XDOMAIN *next_domain;
X
Xvoid insert_article(), expire(), trim_roots(), order_roots(), trim_authors();
Xvoid make_root(), use_root(), merge_roots(), set_root(), unlink_root();
Xvoid link_child(), unlink_child();
Xvoid free_article(), free_domain(), free_subject(), free_root(), free_author();
Xvoid get_subject_str(), get_author_str();
XARTICLE *get_article();
XSUBJECT *new_subject();
XAUTHOR *new_author();
X
X#ifdef TZSET
Xextern time_t tnow;
X#else
Xextern struct timeb ftnow;
X#endif
X
X#ifndef SERVER
Xstatic FILE *fp_article;
X#endif
X
X/* Given the upper/lower bounds of the articles in the current group, add all
X** the ones that we don't know about and remove all the ones that have expired.
X** The current directory must be the newgroup's spool directory.
X*/
Xvoid
Xprocess_articles( first_article, last_article )
XART_NUM first_article, last_article;
X{
X    register char *cp, *str;
X    register ARTICLE *article;
X    register ART_NUM i;
X    time_t date;
X    int len;
X#ifdef SERVER
X    bool orig_extra = extra_expire;
X#endif
X    extern int errno;
X    extern int sys_nerr;
X    extern char *sys_errlist[];
X
X    if( first_article > (i = total.last+1) ) {
X	i = first_article;
X    }
X    processed_groups++;
X    added_count = last_article - i + 1;
X    expired_count = 0;
X
X    for( ; i <= last_article; i++ ) {
X#ifdef SERVER
X	sprintf( buff, "HEAD %ld", (long)i );
X	put_server( buff );
X	if( get_server( buff, sizeof buff ) < 0 || *buff == CHAR_FATAL ) {
X	    last_article = i - 1;
X	    extra_expire = FALSE;
X	    break;
X	}
X	if( *buff != CHAR_OK ) {
X	    added_count--;
X	    continue;
X	}
X#else
X	/* Open article in current directory. */
X	sprintf( buff, "%ld", (long)i );
X	/* Set errno for purely paranoid reasons */
X	errno = 0;
X	if( (fp_article = fopen( buff, "r" )) == Nullfp ) {
X	    /* Missing files are ok -- they've just been expired or canceled */
X	    if( errno != 0 && errno != ENOENT ) {
X		if( errno < 0 || errno > sys_nerr ) {
X		    log_error( "Can't open `%s': Error %d.\n", buff, errno );
X		} else {
X		    log_error( "Can't open `%s': %s.\n", buff,
X		      sys_errlist[errno] );
X		}
X	    }
X	    added_count--;
X	    continue;
X	}
X#endif
X
X	article = Nullart;
X	*references = '\0';
X	*author_str = '\0';
X	*subject_str = '\0';
X	found_Re = 0;
X	date = 0;
X
X#ifdef SERVER
X	while( get_server( cp = buff, sizeof buff ) == 0 ) {
X	  process_line:
X	    if( *cp == '.' ) {
X		break;
X	    }
X#else
X	while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) {
X	  process_line:
X	    if( *cp == '\n' ) {		/* check for end of header */
X		break;			/* break out when found */
X	    }
X#endif
X	    if( (unsigned char)*cp <= ' ' ) {	 /* skip continuation lines */
X		continue;		/* (except references -- see below) */
X	    }
X	    if( (str = index( cp, ':' )) == Nullch ) {
X		break;			/* end of header if no colon found */
X	    }
X	    if( (len = str - cp) > 10 ) {
X		continue;		/* skip keywords > 10 chars */
X	    }
X#ifndef SERVER
X	    cp[strlen(cp)-1] = '\0';	/* remove newline */
X#endif
X	    while( cp < str ) {		/* lower-case the keyword */
X		if( (unsigned char)*cp <= ' ' ) { /* stop at any whitespace */
X		    break;
X		}
X		if( isupper(*cp) ) {
X		    *cp = tolower(*cp);
X		}
X		cp++;
X	    }
X	    *cp = '\0';
X	    cp = buff;
X	    if( len == 4 && strEQ( cp, "date" ) ) {
X#ifdef TZSET
X	        date = getdate( str + 1, tnow, timezone );
X#else
X		date = getdate( str + 1, ftnow.time, (long) ftnow.timezone );
X#endif
X	    } else
X	    if( len == 4 && strEQ( cp, "from" ) ) {
X		get_author_str( str + 1 );
X	    } else
X	    if( len == 7 && strEQ( cp, "subject" ) ) {
X		get_subject_str( str + 1 );
X	    } else
X	    if( len == 10 && strEQ( cp, "message-id" ) ) {
X		if( !article ) {
X		    article = get_article( str + 1 );
X		} else {
X		    if( log_verbosity ) {
X			log_error( "Found multiple Message-IDs! [%ld].\n",
X				(long)i );
X		    }
X		}
X	    } else
X	    if( len == 10 && strEQ( cp, "references" ) ) {
X		/* include preceding space in saved reference */
X		len = strlen( str + 1 );
X		bcopy( str + 1, references, len + 1 );
X		str = references + len;
X		/* check for continuation lines */
X#ifdef SERVER
X		while( get_server( cp = buff, sizeof buff ) == 0 ) {
X#else
X		while( (cp = fgets( buff, sizeof buff, fp_article )) != Nullch ) {
X#endif
X		    if( *cp != ' ' && *cp != '\t' ) {
X			goto process_line;
X		    }
X		    while( *++cp == ' ' || *cp == '\t' ) {
X			;
X		    }
X		    *--cp = ' ';
X		    /* If the references are too long, shift them over to
X		    ** always save the most recent ones.
X		    */
X		    if( (len += strlen( cp )) > 1023 ) {
X			strcpy( buff, buff + len - 1023 );
X			str -= len - 1023;
X			len = 1023;
X		    }
X		    strcpy( str, cp );
X		}/* while */
X		break;
X	    }/* if */
X	}/* while */
X	if( article ) {
X	    insert_article( article, date, i );
X	} else {
X	    if( log_verbosity ) {
X		log_error( "Message-ID line missing! [%ld].\n", (long)i );
X	    }
X	}
X#ifndef SERVER
X	fclose( fp_article );
X#endif
X    }
X
X    if( extra_expire || first_article > total.first ) {
X	expire( first_article );
X    }
X    trim_roots();
X    order_roots();
X    trim_authors();
X
X    total.first = first_article;
X    total.last = last_article;
X#ifdef SERVER
X    extra_expire = orig_extra;
X#endif
X}
X
X/* Search all articles for numbers less than new_first.  Traverse the list
X** using the domain links so we don't have to deal with the tree structure.
X** If extra_expire is true, stat() all valid articles to make sure they are
X** really there and expire them if they're not.
X*/
Xvoid
Xexpire( new_first )
XART_NUM new_first;
X{
X    register DOMAIN *domain;
X    register ARTICLE *article, *next_art, *hold;
X
X    for( domain = &unk_domain; domain; domain = next_domain ) {
X	next_domain = domain->link;
X	for( article = domain->ids; article; article = next_art ) {
X	    next_art = article->id_link;
X	    if( !article->subject || (article->flags & NEW_ARTICLE) ) {
X		continue;
X	    }
X	    if( extra_expire && article->num >= new_first ) {
X#ifdef SERVER
X		sprintf( buff, "STAT %ld", (long)article->num );
X		put_server( buff );
X		if( get_server( buff, sizeof buff ) == 0 && *buff == CHAR_OK ) {
X		    continue;
X		}
X#else
X		sprintf( buff, "%ld", (long)article->num );
X		if( !stat( buff, &filestat ) || errno != ENOENT ) {
X		    continue;
X		}
X#endif
X	    }
X	    if( extra_expire || article->num < new_first ) {
X		article->subject->count--;
X		article->subject = 0;
X		article->author->count--;
X		article->author = 0;
X		/* Free expired article if it has no children.  Then check
X		** if the parent(s) are also fake and can be freed.  We'll
X		** free any empty roots later.
X		*/
X		while( !article->children ) {
X		    hold = article->parent;
X		    unlink_child( article );
X		    free_article( article );
X		    if( hold && !hold->subject ) {
X			if( (article = hold) == next_art ) {
X			    next_art = next_art->id_link;
X			}
X		    } else {
X			break;
X		    }
X		}
X		expired_count++;
X	    }/* if */
X	}/* for */
X    }/* for */
X    next_domain = Null(DOMAIN*);
X}
X
X/* Trim the article chains down so that we don't have more than one faked
X** article between the root any real ones.
X*/
Xvoid
Xtrim_roots()
X{
X    register ROOT *root, *last_root;
X    register ARTICLE *article, *next;
X    register SUBJECT *subject, *last_subj;
X    register int found;
X
X#ifndef lint
X    last_root = (ROOT *)&root_root;
X#else
X    last_root = Null(ROOT*);
X#endif
X    for( root = root_root; root; root = last_root->link ) {
X	for( article = root->articles; article; article = article->siblings ) {
X	    /* If an article has no subject, it is a "fake" reference node.
X	    ** If all of its immediate children are also fakes, delete it
X	    ** and graduate the children to the root.  If everyone is fake,
X	    ** the chain dies.
X	    */
X	    while( !article->subject ) {
X		found = 0;
X		for( next = article->children; next; next = next->siblings ) {
X		    if( next->subject ) {
X			found = 1;
X			break;
X		    }
X		}
X		if( !found ) {
X		    /* Remove this faked article and move all its children
X		    ** up to the root.
X		    */
X		    next = article->children;
X		    unlink_child( article );
X		    free_article( article );
X		    for( article = next; article; article = next ) {
X			next = article->siblings;
X			article->parent = Nullart;
X			link_child( article );
X		    }
X		    article = root->articles;	/* start this root over */
X		} else {
X		    break;			/* else, on to next article */
X		}
X	    }
X	}
X	/* Free all unused subject strings.  Begin by trying to find a
X	** subject for the root's pointer.
X	*/
X	for( subject = root->subjects; subject && !subject->count; subject = root->subjects ) {
X	    root->subjects = subject->link;
X	    free_subject( subject );
X	    root->subject_cnt--;
X	}
X	/* Then free up any unsed intermediate subjects.
X	*/
X	if( (last_subj = subject) != Null(SUBJECT*) ) {
X	    while( (subject = subject->link) != Null(SUBJECT*) ) {
X		if( !subject->count ) {
X		    last_subj->link = subject->link;
X		    free_subject( subject );
X		    root->subject_cnt--;
X		    subject = last_subj;
X		} else {
X		    last_subj = subject;
X		}
X	    }
X	}
X	/* Now, free all roots without articles.  Flag unexpeced errors.
X	*/
X	if( !root->articles ) {
X	    if( root->subjects ) {
X		log_error( "** Empty root still had subjects remaining! **\n" );
X	    }
X	    last_root->link = root->link;
X	    free_root( root );
X	} else {
X	    last_root = root;
X	}
X    }
X}
X
X/* Descend the author list, find any author names that aren't used
X** anymore and free them.
X*/
Xvoid
Xtrim_authors()
X{
X    register AUTHOR *author, *last_author;
X
X#ifndef lint
X    last_author = (AUTHOR *)&author_root;
X#else
X    last_author = Null(AUTHOR*);
X#endif
X    for( author = author_root; author; author = last_author->link ) {
X	if( !author->count ) {
X	    last_author->link = author->link;
X	    free_author( author );
X	} else {
X	    last_author = author;
X	}
X    }
X}
X
X/* Reorder the roots to place the oldest ones first (age determined by
X** date of oldest article).
X*/
Xvoid
Xorder_roots()
X{
X    register ROOT *root, *next, *search;
X
X    /* If we don't have at least two roots, we're done! */
X    if( !(root = root_root) || !(next = root->link) ) {
X	return;						/* RETURN */
X    }
X    /* Break the old list off after the first root, and then start
X    ** inserting the roots into the list by date.
X    */
X    root->link = Null(ROOT*);
X    while( (root = next) != Null(ROOT*) ) {
X	next = next->link;
X	if( (search = root_root)->articles->date >= root->articles->date ) {
X	    root->link = root_root;
X	    root_root = root;
X	} else {
X	    while( search->link
X	     && search->link->articles->date < root->articles->date ) {
X		search = search->link;
X	    }
X	    root->link = search->link;
X	    search->link = root;
X	}
X    }
X}
X
X#define EQ(x,y) ((isupper(x) ? tolower(x) : (x)) == (y))
X
X/* Parse the subject into 72 characters or less.  Remove any "Re[:^]"s from
X** the front (noting that it's there), and any "(was: old)" stuff from
X** the end.  Then, compact multiple whitespace characters into one space,
X** trimming leading/trailing whitespace.  If it's still too long, unmercifully
X** cut it off.  We don't bother with subject continuation lines either.
X*/
Xvoid
Xget_subject_str( str )
Xregister char *str;
X{
X    register char *cp;
X    register int len;
X
X    while( *str && (unsigned char)*str <= ' ' ) {
X	str++;
X    }
X    if( !*str ) {
X	bcopy( "<None>", subject_str, 7 );
X	return;						/* RETURN */
X    }
X    cp = str;
X    while( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' ) ) {	/* check for Re: */
X	cp += 2;
X	if( *cp == '^' ) {				/* allow Re^2: */
X	    while( *++cp <= '9' && *cp >= '0' ) {
X		;
X	    }
X	}
X	if( *cp != ':' ) {
X	    break;
X	}
X	while( *++cp == ' ' ) {
X	    ;
X	}
X	found_Re = 1;
X	str = cp;
X    }
X    /* Remove "(was Re: oldsubject)", because we already know the old subjects.
X    ** Also match "(Re: oldsubject)".  Allow possible spaces after the ('s.
X    */
X    for( cp = str; (cp = index( cp+1, '(' )) != Nullch; ) {
X	while( *++cp == ' ' ) {
X	    ;
X	}
X	if( EQ( cp[0], 'w' ) && EQ( cp[1], 'a' ) && EQ( cp[2], 's' )
X	 && (cp[3] == ':' || cp[3] == ' ') )
X	{
X	    *--cp = '\0';
X	    break;
X	}
X	if( EQ( cp[0], 'r' ) && EQ( cp[1], 'e' )
X	 && ((cp[2]==':' && cp[3]==' ') || (cp[2]=='^' && cp[4]==':')) ) {
X	    *--cp = '\0';
X	    break;
X	}
X    }
X    /* Copy subject to a temporary string, compacting multiple spaces/tabs */
X    for( len = 0, cp = subject_str; len < 72 && *str; len++ ) {
X	if( (unsigned char)*str <= ' ' ) {
X	    while( *++str && (unsigned char)*str <= ' ' ) {
X		;
X	    }
X	    *cp++ = ' ';
X	} else {
X	    *cp++ = *str++;
X	}
X    }
X    if( cp[-1] == ' ' ) {
X	cp--;
X    }
X    *cp = '\0';
X}
X
X/* Try to fit the author name in 16 bytes.  Use the comment portion in
X** parenthesis if present.  Cut off non-commented names at the '@' or '%'.
X** Then, put as many characters as we can into the 16 bytes, packing multiple
X** whitespace characters into a single space.
X** We should really implement a nice name shortening algorithm, or simply
X** grab the name packing code from nn.
X*/
Xvoid
Xget_author_str( str )
Xchar *str;
X{
X    register char *cp, *cp2;
X
X    if( (cp = index( str, '(' )) != Nullch ) {
X	str = cp+1;
X	if( (cp = rindex( str, ')' )) != Nullch ) {
X	    *cp = '\0';
X	}
X    } else {
X	if( (cp = index( str, '@' )) != Nullch ) {
X	    *cp = '\0';
X	}
X	if( (cp = index( str, '%' )) != Nullch ) {
X	    *cp = '\0';
X	}
X    }
X    for( cp = str, cp2 = author_str; *cp && cp2-author_str < 16; ) {
X	/* Pack white space and turn ctrl-chars into spaces. */
X	if( *cp <= ' ' ) {
X	    while( *++cp && *cp <= ' ' ) {
X		;
X	    }
X	    if( cp2 != author_str ) {
X		*cp2++ = ' ';
X	    }
X	} else {
X	    *cp2++ = *cp++;
X	}
X    }
X    *cp2 = '\0';
X}
X
X/* Take a message-id and see if we already know about it.  If so, return it.
X** If not, create it.  We separate the id into its id at domain parts, and
X** link all the unique ids to one copy of the domain portion.  This saves
X** a bit of space.
X*/
XARTICLE *
Xget_article( msg_id )
Xchar *msg_id;
X{
X    register DOMAIN *domain;
X    register ARTICLE *article;
X    register char *cp, *after_at;
X
X    /* Take message id, break it up into <id at domain>, and try to match it.
X    */
X    while( *msg_id == ' ' ) {
X	msg_id++;
X    }
X    cp = msg_id + strlen( msg_id ) - 1;
X    if( msg_id >= cp ) {
X	if( log_verbosity ) {
X	    log_error( "Message-ID is empty!\n" );
X	}
X	return Nullart;
X    }
X    if( *msg_id++ != '<' ) {
X	if( log_verbosity ) {
X	    log_error( "Message-ID doesn't start with '<'.\n" );
X	}
X	msg_id--;
X    }
X    if( *cp != '>' ) {
X	if( log_verbosity ) {
X	    log_error( "Message-ID doesn't end with '>'.\n" );
X	}
X	cp++;
X    }
X    *cp = '\0';
X    if( msg_id == cp ) {
X	if( log_verbosity ) {
X	    log_error( "Message-ID is null!\n" );
X	}
X	return Nullart;
X    }
X
X    if( (after_at = index( msg_id, '@' )) == Nullch ) {
X	domain = &unk_domain;
X    } else {
X	*after_at++ = '\0';
X	for( cp = after_at; *cp; cp++ ) {
X	    if( isupper(*cp) ) {
X		*cp = tolower(*cp);		/* lower-case domain portion */
X	    }
X	}
X	*cp = '\0';
X	/* Try to find domain name in database. */
X	for( domain = unk_domain.link; domain; domain = domain->link ) {
X	    if( strEQ( domain->name, after_at ) ) {
X		break;
X	    }
X	}
X	if( !domain ) {		/* if domain doesn't exist, create it */
X	  register int len = cp - after_at + 1;
X	    domain = (DOMAIN *)safemalloc( sizeof (DOMAIN) );
X	    total.domain++;
X	    domain->name = safemalloc( len );
X	    total.string2 += len;
X	    bcopy( after_at, domain->name, len );
X	    domain->ids = Nullart;
X	    domain->link = unk_domain.link;
X	    unk_domain.link = domain;
X	}
X    }
X    /* Try to find id in this domain. */
X    for( article = domain->ids; article; article = article->id_link ) {
X	if( strEQ( article->id, msg_id ) ) {
X	    break;
X	}
X    }
X    if( !article ) {		/* If it doesn't exist, create an article */
X      register int len = strlen( msg_id ) + 1;
X	article = (ARTICLE *)safemalloc( sizeof (ARTICLE) );
X	bzero( article, sizeof (ARTICLE) );
X	total.article++;
X	article->num = 0;
X	article->id = safemalloc( len );
X	total.string2 += len;
X	bcopy( msg_id, article->id, len );
X	article->domain = domain;
X	article->id_link = domain->ids;
X	domain->ids = article;
X    }
X    return article;
X}
X
X/* Take all the data we've accumulated about the article and shove it into
X** the article tree at the best place we can possibly imagine.
X*/
Xvoid
Xinsert_article( article, date, num )
XARTICLE *article;
Xtime_t date;
XART_NUM num;
X{
X    register ARTICLE *node, *last;
X    register char *cp, *end;
X    int len;
X
X    if( article->subject ) {
X	if( log_verbosity ) {
X	    log_error( "We've already seen article #%ld (%s@%s)\n",
X		(long)num, article->id, article->domain->name );
X	}
X	return;						/* RETURN */
X    }
X    article->date = date;
X    article->num = num;
X    article->flags = NEW_ARTICLE;
X
X    if( !*references && found_Re ) {
X	if( log_verbosity > 1 ) {
X	    log_error( "Missing reference line!  [%ld]\n", (long)num );
X	}
X    }
X    /* If the article has a non-zero root, it is already in a thread somewhere.
X    ** Unlink it to try to put it in the best possible spot.
X    */
X    if( article->root ) {
X	/* Check for a real or shared-fake parent.  Articles that have never
X	** existed have a num of 0.  Expired articles that remain as references
X	** have a valid num.  (Valid date too, but no subject.)
X	*/
X	for( node = article->parent;
X	     node && !node->num && node->child_cnt == 1;
X	     node = node->parent )
X	{
X	    ;
X	}
X	unlink_child( article );
X	if( node ) {			/* do we have decent parents? */
X	    /* Yes: assume that our references are ok, and just reorder us
X	    ** with our siblings by date.
X	    */
X	    link_child( article );
X	    use_root( article, article->root );
X	    /* Freshen the date in any faked parent articles. */
X	    for( node = article->parent;
X		 node && !node->num && date < node->date;
X		 node = node->parent )
X	    {
X		node->date = date;
X		unlink_child( node );
X		link_child( node );
X	    }
X	    return;					/* RETURN */
X	}
X	/* We'll assume that this article has as good or better references
X	** than the child that faked us initially.  Free the fake reference-
X	** chain and process our references as usual.
X	*/
X	for( node = article->parent; node; node = node->parent ) {
X	    unlink_child( node );
X	    free_article( node );
X	}
X	article->parent = Nullart;		/* neaten up */
X	article->siblings = Nullart;
X    }
X  check_references:
X    if( !*references ) {	/* If no references but "Re:" in subject, */
X	if( found_Re ) {	/* search for a reference in any cited text */
X#ifndef SERVER
X	    for( len = 4; len && fgets( buff, sizeof buff, fp_article ); len-- ) {
X		if( (cp = index( buff, '<' )) && (end = index( cp, ' ' )) ) {
X		    if( end[-1] == ',' ) {
X			end--;
X		    }
X		    *end = '\0';
X		    if( (end = index( cp, '>' )) == Nullch ) {
X			end = cp + strlen( cp ) - 1;
X		    }
X		    if( valid_message_id( cp, end ) ) {
X			strcpy( references+1, cp );
X			*references = ' ';
X			if( log_verbosity > 2 ) {
X			    log_error( "Found cited-text reference: '%s' [%ld]\n",
X				references+1, (long)num );
X			}
X			break;
X		    }
X		}
X	    }
X#endif
X	} else {
X	    article->flags |= ROOT_ARTICLE;
X	}
X    }
X    /* If we have references, process them from the right end one at a time
X    ** until we either run into somebody, or we run out of references.
X    */
X    if( *references ) {
X	last = article;
X	node = Nullart;
X	end = references + strlen( references ) - 1;
X	while( (cp = rindex( references, ' ' )) != Nullch ) {
X	    *cp++ = '\0';
X	    while( end >= cp && ((unsigned char)*end <= ' ' || *end == ',') ) {
X		end--;
X	    }
X	    end[1] = '\0';
X	    /* Quit parsing references if this one is garbage. */
X	    if( !valid_message_id( cp, end ) ) {
X		if( log_verbosity ) {
X		    log_error( "Bad ref '%s' [%ld]\n", cp, (long)num );
X		}
X		break;
X	    }
X	    /* Dump all domains that end in '.', such as "..." & "1 at DEL." */
X	    if( end[-1] == '.' ) {
X		break;
X	    }
X	    node = get_article( cp );
X	    /* Check for duplicates on the reference line.  Brand-new data has
X	    ** no date.  Data we just allocated earlier on this line has a
X	    ** date but no root.  Special-case the article itself, since it
X	    ** MIGHT have a root.
X	    */
X	    if( (node->date && !node->root) || node == article ) {
X		if( log_verbosity ) {
X		    log_error( "Reference line contains duplicates [%ld]\n",
X			(long)num );
X		}
X		if( (node = last) == article ) {
X		    node = Nullart;
X		}
X		continue;
X	    }
X	    last->parent = node;
X	    link_child( last );
X	    if( node->root ) {
X		break;
X	    }
X	    node->date = date;
X	    last = node;
X	    end = cp-2;
X	}
X	if( !node ) {
X	    *references = '\0';
X	    goto check_references;
X	}
X	/* Check if we ran into anybody that was already linked.  If so, we
X	** just use their root.
X	*/
X	if( node->root ) {
X	    /* See if this article spans the gap between what we thought
X	    ** were two different roots.
X	    */
X	    if( article->root && article->root != node->root ) {
X		merge_roots( node->root, article->root );
X		/* Set the roots of any children we brought with us. */
X		set_root( article, node->root );
X	    }
X	    use_root( article, node->root );
X	} else {
X	    /* We didn't find anybody we knew, so either create a new root or
X	    ** use the article's root if it was previously faked.
X	    */
X	    if( !article->root ) {
X		make_root( node );
X		use_root( article, node->root );
X	    } else {
X		use_root( article, article->root );
X		node->root = article->root;
X		link_child( node );
X	    }
X	}
X	/* Set the roots of the faked articles we created as references. */
X	for( node = article->parent; node && !node->root; node = node->parent ) {
X	    node->root = article->root;
X	}
X	/* Make sure we didn't circularly link to a child article(!), by
X	** ensuring that we run into the root before we run into ourself.
X	*/
X	while( node && node->parent != article ) {
X	    node = node->parent;
X	}
X	if( node ) {
X	    /* Ugh.  Someone's tweaked reference line with an incorrect
X	    ** article order arrived first, and one of our children is
X	    ** really one of our ancestors. Cut off the bogus child branch
X	    ** right where we are and link it to the root.
X	    */
X	    if( log_verbosity ) {
X		log_error("Found ancestral child -- fixing.\n");
X	    }
X	    unlink_child( node );
X	    node->parent = Nullart;
X	    link_child( node );
X	}
X    } else {
X	/* The article has no references.  Either turn it into a new root, or
X	** re-attach fleshed-out (previously faked) article to its old root.
X	*/
X	if( !article->root ) {
X	    make_root( article );
X	} else {
X	    use_root( article, article->root );
X	    link_child( article );
X	}
X    }
X}
X
X/* Check if the string we've found looks like a valid message-id reference.
X*/
Xint
Xvalid_message_id( start, end )
Xregister char *start, *end;
X{
X    int lower_case;
X    char *mid;
X
X    if( *end != '>' ) {
X	/* Compensate for spacecadets who include the header in their
X	** subsitution of all '>'s into another citation character.
X	*/
X	if( *end == '<' || *end == '-' || *end == '!' || *end == '%'
X	 || *end == ')' || *end == '|' || *end == ':' || *end == '}'
X	 || *end == '*' || *end == '+' || *end == '#' || *end == ']'
X	 || *end == '@' ) {
X	    if( log_verbosity ) {
X		log_error( "Reference ended in '%c'.\n", *end );
X	    }
X	    *end = '>';
X	}
X    }
X    /* Id must be "<... at ...>" */
X    if( *start != '<' || *end != '>' || (mid = index( start, '@' )) == Nullch
X     || mid == start+1 || mid+1 == end ) {
X	return 0;					/* RETURN */
X    }
X    /* Try to weed-out non-ids (user at domain) by looking for lower-case without
X    ** digits in the unique portion.  B news ids are all digits; standard C
X    ** news are digits with mixed case; and Zeeff message ids are any mixture
X    ** of digits, certain punctuation characters and upper-case.
X    */
X    lower_case = 0;
X    do {
X	if( *start <= '9' && *start >= '0' ) {
X	    return 1;					/* RETURN */
X	}
X	lower_case = lower_case || (*start >= 'a' && *start <= 'z');
X    } while( ++start < mid );
X
X    return !lower_case;
X}
X
X/* Remove an article from its parent/siblings.  Leave parent pointer intact.
X*/
Xvoid
Xunlink_child( child )
Xregister ARTICLE *child;
X{
X    register ARTICLE *last;
X
X    if( !(last = child->parent) ) {
X	child->root->thread_cnt--;
X	if( (last = child->root->articles) == child ) {
X	    child->root->articles = child->siblings;
X	} else {
X	    goto sibling_search;
X	}
X    } else {
X	last->child_cnt--;
X	if( last->children == child ) {
X	    last->children = child->siblings;
X	} else {
X	    last = last->children;
X	  sibling_search:
X	    while( last->siblings != child ) {
X		last = last->siblings;
X	    }
X	    last->siblings = child->siblings;
X	}
X    }
X}
X
X/* Link an article to its parent article.  If its parent pointer is zero,
X** link it to its root.  Sorts siblings by date.
X*/
Xvoid
Xlink_child( child )
Xregister ARTICLE *child;
X{
X    register ARTICLE *node;
X    register ROOT *root;
X
X    if( !(node = child->parent) ) {
X	root = child->root;
X	root->thread_cnt++;
X	node = root->articles;
X	if( !node || child->date < node->date ) {
X	    child->siblings = node;
X	    root->articles = child;
X	} else {
X	    goto sibling_search;
X	}
X    } else {
X	node->child_cnt++;
X	node = node->children;
X	if( !node || child->date < node->date ) {
X	    child->siblings = node;
X	    child->parent->children = child;
X	} else {
X	  sibling_search:
X	    for( ; node->siblings; node = node->siblings ) {
X		if( node->siblings->date > child->date ) {
X		    break;
X		}
X	    }
X	    child->siblings = node->siblings;
X	    node->siblings = child;
X	}
X    }
X}
X
X/* Create a new root for the specified article.  If the current subject_str
X** matches any pre-existing root's subjects, we'll instead add it on as a
X** parallel thread.
X*/
Xvoid
Xmake_root( article )
XARTICLE *article;
X{
X    register ROOT *new, *node;
X    register SUBJECT *subject;
X
X#ifndef NO_SUBJECT_MATCHING
X    /* First, check the other root's subjects for a match. */
X    for( node = root_root; node; node = node->link ) {
X	for( subject = node->subjects; subject; subject = subject->link ) {
X	    if( subject_equal( subject->str, subject_str ) ) {
X		use_root( article, node );		/* use it instead */
X		link_child( article );
X		return;					/* RETURN */
X	    }
X	}
X    }
X#endif
X
X    /* Create a new root. */
X    new = (ROOT *)safemalloc( sizeof (ROOT) );
X    total.root++;
X    new->articles = article;
X    new->root_num = article->num;
X    new->thread_cnt = 1;
X    if( article->num ) {
X	article->author = new_author();
X	new->subject_cnt = 1;
X	new->subjects = article->subject = new_subject();
X    } else {
X	new->subject_cnt = 0;
X	new->subjects = Null(SUBJECT*);
X    }
X    article->root = new;
X    new->link = root_root;
X    root_root = new;
X}
X
X/* Add this article's subject onto the indicated root's list.  Point the
X** article at the root.
X*/
Xvoid
Xuse_root( article, root )
XARTICLE *article;
XROOT *root;
X{
X    register SUBJECT *subject;
X    register ROOT *root2;
X    SUBJECT *hold, *child_subj = Null(SUBJECT*);
X    ARTICLE *node;
X
X    article->root = root;
X
X    /* If it's a fake, there's no subject to add. */
X    if( !article->num ) {
X	return;						/* RETURN */
X    }
X
X    /* If we haven't picked a unique message number to represent this root,
X    ** use the first non-zero number we encounter.  Which one doesn't matter.
X    */
X    if( !root->root_num ) {
X	root->root_num = article->num;
X    }
X    article->author = new_author();
X
X    /* Check if the new subject matches any of the other subjects in this root.
X    ** If so, we just update the count.  If not, check all the other roots for
X    ** a match.  If found, the new subject is common between the two roots, so
X    ** we merge the two roots together.
X    */
X    root2 = root;
X#ifndef NO_SUBJECT_MATCHING
X    do {
X#endif
X	for( subject = root2->subjects; subject; subject = subject->link ) {
X	    if( subject_equal( subject->str, subject_str ) ) {
X		article->subject = subject;
X		subject->count++;
X#ifndef NO_SUBJECT_MATCHING
X		if( root2 != root ) {
X		    merge_roots( root, root2 );
X		}
X#endif
X		return;					/* RETURN */
X	    }
X	}
X#ifndef NO_SUBJECT_MATCHING
X	if( (root2 = root2->link) == Null(ROOT*) ) {
X	    root2 = root_root;
X	}
X    } while( root2 != root );
X#endif
X
X    article->subject = hold = new_subject();
X    root->subject_cnt++;
X
X    /* Find subject of any pre-existing children.  We want to insert the new
X    ** subject before a child's to keep the subject numbering intuitive
X    ** in the newsreader.
X    */
X    for( node = article->children; node; node = node->children ) {
X	if( node->subject ) {
X	    child_subj = node->subject;
X	    break;
X	}
X    }
X    if( !(subject = root->subjects) || subject == child_subj ) {
X	hold->link = root->subjects;
X	root->subjects = hold;
X    } else {
X	while( subject->link && subject->link != child_subj ) {
X	    subject = subject->link;
X	}
X	hold->link = subject->link;
X	subject->link = hold;
X    }
X}
X
X/* Check subjects in a case-insignificant, punctuation ignoring manner.
X*/
Xint
Xsubject_equal( str1, str2 )
Xregister char *str1, *str2;
X{
X    register char ch1, ch2;
X
X    while( (ch1 = *str1++) ) {
X	if( ch1 == ' ' || ispunct( ch1 ) ) {
X	    while( *str1 && (*str1 == ' ' || ispunct( *str1 )) ) {
X		str1++;
X	    }
X	    ch1 = ' ';
X	} else if( isupper( ch1 ) ) {
X	    ch1 = tolower( ch1 );
X	}
X	if( !(ch2 = *str2++) ) {
X	    return 0;
X	}
X	if( ch2 == ' ' || ispunct( ch2 ) ) {
X	    while( *str2 && (*str2 == ' ' || ispunct( *str2 )) ) {
X		str2++;
X	    }
X	    ch2 = ' ';
X	} else if( isupper( ch2 ) ) {
X	    ch2 = tolower( ch2 );
X	}
X	if( ch1 != ch2 ) {
X	    return 0;
X	}
X    }
X    if( *str2 ) {
X	return 0;
X    }
X    return 1;
X}
X
X/* Create a new subject structure. */
XSUBJECT *
Xnew_subject()
X{
X    register int len = strlen( subject_str ) + 1;
X    register SUBJECT *subject;
X
X    subject = (SUBJECT *)safemalloc( sizeof (SUBJECT) );
X    total.subject++;
X    subject->count = 1;
X    subject->link = Null(SUBJECT*);
X    subject->str = safemalloc( len );
X    total.string1 += len;
X    bcopy( subject_str, subject->str, len );
X
X    return subject;
X}
X
X/* Create a new author structure. */
XAUTHOR *
Xnew_author()
X{
X    register len = strlen( author_str ) + 1;
X    register AUTHOR *author, *last_author;
X
X    last_author = Null(AUTHOR*);
X    for( author = author_root; author; author = author->link ) {
X#ifndef DONT_COMPARE_AUTHORS	/* might like to define this to save time */
X	if( strEQ( author->name, author_str ) ) {
X	    author->count++;
X	    return author;				/* RETURN */
X	}
X#endif
X	last_author = author;
X    }
X
X    author = (AUTHOR *)safemalloc( sizeof (AUTHOR) );
X    total.author++;
X    author->count = 1;
X    author->link = Null(AUTHOR*);
X    author->name = safemalloc( len );
X    total.string1 += len;
X    bcopy( author_str, author->name, len );
X
X    if( last_author ) {
X	last_author->link = author;
X    } else {
X	author_root = author;
X    }
X    return author;
X}
X
X/* Insert all of root2 into root1, setting the proper root values and
X** updating subject counts.
X*/
Xvoid
Xmerge_roots( root1, root2 )
XROOT *root1, *root2;
X{
X    register ARTICLE *node, *next;
X    register SUBJECT *subject;
X
X    /* Remember whoever's root num is lower.  This could screw up a
X    ** newsreader's kill-thread code if someone already saw the roots as
X    ** being separate, but it must be done.  The newsreader code will have
X    ** to handle this as best as it can.
X    */
X    if( root1->root_num > root2->root_num ) {
X	root1->root_num = root2->root_num;
X    }
X
X    for( node = root2->articles; node; node = next ) {
X	/* For each article attached to root2, detach them, set the
X	** branch's root pointers to root1, and then attach it to root1.
X	*/
X	next = node->siblings;
X	unlink_child( node );
X	node->siblings = Nullart;
X	set_root( node, root1 );		/* sets children too */
X	/* Link_child() depends on node->parent being null and node->root
X	** being set.
X	*/
X	link_child( node );
X    }
X    root1->subject_cnt += root2->subject_cnt;
X    if( !(subject = root1->subjects) ) {
X	root1->subjects = root2->subjects;
X    } else {
X	while( subject->link ) {
X	    subject = subject->link;
X	}
X	subject->link = root2->subjects;
X    }
X    unlink_root( root2 );
X    free_root( root2 );
X}
X
X/* When merging roots, we need to reset all the root pointers.
X*/
Xvoid
Xset_root( node, root )
XARTICLE *node;
XROOT *root;
X{
X    do {
X	node->root = root;
X	if( node->children ) {
X	    set_root( node->children, root );
X	}
X    } while( node = node->siblings );
X}
X
X/* Unlink a root from its neighbors. */
Xvoid
Xunlink_root( root )
Xregister ROOT *root;
X{
X    register ROOT *node;
X
X    if( (node = root_root) == root ) {
X	root_root = root->link;
X    } else {
X	while( node->link != root ) {
X	    node = node->link;
X	}
X	node->link = root->link;
X    }
X}
X
X/* Free an article and its message-id string.  All other resources must
X** already be free, and it must not be attached to any threads.
X*/
Xvoid
Xfree_article( this )
XARTICLE *this;
X{
X    register ARTICLE *art;
X
X    if( (art = this->domain->ids) == this ) {
X	if( !(this->domain->ids = this->id_link) ) {
X	    free_domain( this->domain );
X	}
X    } else {
X	while( this != art->id_link ) {
X	    art = art->id_link;
X	}
X	art->id_link = this->id_link;
X    }
X    total.string2 -= strlen( this->id ) + 1;
X    free( this->id );
X    free( this );
X    total.article--;
X}
X
X/* Free the domain only when its last unique id has been freed. */
Xvoid
Xfree_domain( this )
XDOMAIN *this;
X{
X    register DOMAIN *domain;
X
X    if( this == (domain = &unk_domain) ) {
X	return;
X    }
X    if( this == next_domain ) {	/* help expire routine skip freed domains */
X	next_domain = next_domain->link;
X    }
X    while( this != domain->link ) {
X	domain = domain->link;
X    }
X    domain->link = this->link;
X    total.string2 -= strlen( this->name ) + 1;
X    free( this->name );
X    free( this );
X    total.domain--;
X}
X
X/* Free the subject structure and its string. */
Xvoid
Xfree_subject( this )
XSUBJECT *this;
X{
X    total.string1 -= strlen( this->str ) + 1;
X    free( this->str );
X    free( this );
X    total.subject--;
X}
X
X/* Free a root.  It must already be unlinked. */
Xvoid
Xfree_root( this )
XROOT *this;
X{
X    free( this );
X    total.root--;
X}
X
X/* Free the author structure when it's not needed any more. */
Xvoid
Xfree_author( this )
XAUTHOR *this;
X{
X    total.string1 -= strlen( this->name ) + 1;
X    free( this->name );
X    free( this );
X    total.author--;
X}
SHAR_EOF
chmod 0660 mt-process.c || echo "restore of mt-process.c fails"
echo "x - extracting mt-read.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > mt-read.c &&
X/* $Header: mt-read.c,v 4.3.3.1 90/07/24 23:51:12 davison Trn $
X**
X** $Log:	mt-read.c,v $
X** Revision 4.3.3.1  90/07/24  23:51:12  davison
X** Initial Trn Release
X** 
X*/
X
X#include "EXTERN.h"
X#include "common.h"
X#include "mthreads.h"
X
Xstatic FILE *fp_in;
X
Xvoid tweak_roots();
X
X/* Attempt to open the thread file.  If it's there, only grab the totals
X** from the start of the file.  This should give them enough information
X** to decide if they need to read the whole thing into memory.
X*/
Xint
Xinit_data( filename )
Xchar *filename;
X{
X    root_root = Null(ROOT*);
X    author_root = Null(AUTHOR*);
X    unk_domain.ids = Nullart;
X    unk_domain.link = Null(DOMAIN*);
X
X    if( (fp_in = fopen( filename, "r" )) == Nullfp ) {
X	bzero( &total, sizeof (TOTAL) );
X	return 0;
X    }
X    if( fread( &total, 1, sizeof (TOTAL), fp_in ) < sizeof (TOTAL) ) {
X	fclose( fp_in );
X	bzero( &total, sizeof (TOTAL) );
X	return 0;
X    }
X    return 1;
X}
X
X/* They want everything.  Read in the packed information and transform it
X** into a set of linked structures that is easily manipulated.
X*/
Xint
Xread_data()
X{
X    if( read_authors()
X     && read_subjects()
X     && read_roots()
X     && read_articles()
X     && read_ids() )
X    {
X	tweak_roots();
X	fclose( fp_in );
X	return 1;
X    }
X    /* Something failed.  Free takes care of checking if we're partially
X    ** allocated.  Any linked-list structures we created were freed before
X    ** we got here.
X    */
X    Free( &strings );
X    Free( &subject_cnts );
X    Free( &author_cnts );
X    Free( &root_array );
X    Free( &subject_array );
X    Free( &article_array );
X    Free( &ids );
X    fclose( fp_in );
X    return 0;
X}
X
X/* They don't want to read the data.  Close the file if we opened it.
X*/
Xvoid
Xdont_read_data( open_flag )
Xint open_flag;		/* 0 == not opened, 1 == open failed, 2 == open */
X{
X    if( open_flag == 2 ) {
X	fclose( fp_in );
X    }
X}
X
X#define give_string_to( dest )	/* Comment for makedepend to	 \
X				** ignore the backslash above */ \
X{\
X    register MEM_SIZE len = strlen( string_ptr ) + 1;\
X    dest = safemalloc( len );\
X    bcopy( string_ptr, dest, (int)len );\
X    string_ptr += len;\
X}
X
Xchar *subject_strings;
X
X/* The author information is an array of use-counts, followed by all the
X** null-terminated strings crammed together.  The subject strings are read
X** in at the same time, since they are appended to the end of the author
X** strings.
X*/
Xint
Xread_authors()
X{
X    register int count;
X    register char *string_ptr;
X    register WORD *authp;
X    register AUTHOR *author, *last_author, **author_ptr;
X
X    if( !read_item( &author_cnts, (MEM_SIZE)total.author * sizeof (WORD) )
X     || !read_item( &strings, total.string1 ) ) {
X	return 0;
X    }
X
X    /* We'll use this array to point each article at its proper author
X    ** (packed values are saved as indexes).
X    */
X    author_array = (AUTHOR**)safemalloc( total.author * sizeof (AUTHOR*) );
X    author_ptr = author_array;
X
X    authp = author_cnts;
X    string_ptr = strings;
X
X    last_author = Null(AUTHOR*);
X    for( count = total.author; count--; ) {
X	*author_ptr++ = author = (AUTHOR*)safemalloc( sizeof (AUTHOR) );
X	if( !last_author ) {
X	    author_root = author;
X	} else {
X	    last_author->link = author;
X	}
X	give_string_to( author->name );
X	author->count = *authp++;
X	last_author = author;
X    }
X    last_author->link = Null(AUTHOR*);
X
X    subject_strings = string_ptr;
X
X    free( author_cnts );
X    author_cnts = Null(WORD*);
X
X    return 1;
X}
X
X/* The subject values consist of the crammed-together null-terminated strings
X** (already read in above) and the use-count array.  They were saved in the
X** order that the roots will need when they are unpacked.
X*/
Xint
Xread_subjects()
X{
X    if( !read_item( &subject_cnts, (MEM_SIZE)total.subject * sizeof (WORD) ) ) {
X	return 0;
X    }
X    return 1;
X}
X
X/* Read in the packed root structures and recreate the linked list versions,
X** processing each root's subjects as we go.  Defer interpretation of article
X** offsets until we unpack the article structures.
X*/
Xint
Xread_roots()
X{
X    register int count;
X    register char *string_ptr;
X    register WORD *subjp;
X    ROOT *root, *last_root, **root_ptr;
X    SUBJECT *subject, *last_subject, **subj_ptr;
X    int ret;
X
X    /* Use this array when unpacking the article's subject offsets. */
X    subject_array = (SUBJECT**)safemalloc( total.subject * sizeof (SUBJECT*) );
X    subj_ptr = subject_array;
X    /* And this array points the article's root offsets that the right spot. */
X    root_array = (ROOT**)safemalloc( total.root * sizeof (ROOT*) );
X    root_ptr = root_array;
X
X    subjp = subject_cnts;
X    string_ptr = subject_strings;
X
X#ifndef lint
X    last_root = (ROOT*)&root_root;
X#else
X    last_root = Null(ROOT*);
X#endif
X    for( count = total.root; count--; ) {
X	ret = fread( &p_root, 1, sizeof (PACKED_ROOT), fp_in );
X	if( ret != sizeof (PACKED_ROOT) ) {
X	    log_error( "failed root read --  %d bytes instead of %d.\n",
X		ret, sizeof (PACKED_ROOT) );
X	    ret = 0;
X	    /* Free the roots we've read so far and their subjects. */
X	    while( root_ptr != root_array ) {
X		free( *--root_ptr );
X	    }
X	    while( subj_ptr != subject_array ) {
X		free( (*--subj_ptr)->str );
X		free( *subj_ptr );
X	    }
X	    goto finish_up;
X	}
X	*root_ptr++ = root = (ROOT*)safemalloc( sizeof (ROOT) );
X	root->link = Null(ROOT*);
X	root->seq = p_root.articles;
X	root->root_num = p_root.root_num;
X	root->thread_cnt = p_root.thread_cnt;
X	root->subject_cnt = p_root.subject_cnt;
X	last_subject = Null(SUBJECT*);
X	while( p_root.subject_cnt-- ) {
X	    *subj_ptr++ = subject = (SUBJECT*)safemalloc( sizeof (SUBJECT) );
X	    if( !last_subject ) {
X		root->subjects = subject;
X	    } else {
X		last_subject->link = subject;
X	    }
X	    give_string_to( subject->str );
X	    subject->count = *subjp++;
X	    last_subject = subject;
X	}
X	last_subject->link = Null(SUBJECT*);
X	last_root->link = root;
X	last_root = root;
X    }
X    ret = 1;
X
X  finish_up:
X    free( subject_cnts );
X    free( strings );
X    subject_cnts = Null(WORD*);
X    strings = Nullch;
X
X    return ret;
X}
X
X/* A simple routine that checks the validity of the article's subject value.
X** A -1 means that it is NULL, otherwise it should be an offset into the
X** subject array we just unpacked.
X*/
XSUBJECT *
Xvalid_subject( num, art_num )
XWORD num;
Xlong art_num;
X{
X    if( num == -1 ) {
X	return Null(SUBJECT*);
X    }
X    if( num < 0 || num >= total.subject ) {
X	log_error( "Invalid subject in data file: %d [%ld]\n", num, art_num );
X	return Null(SUBJECT*);
X    }
X    return subject_array[num];
X}
X
X/* Ditto for author checking. */
XAUTHOR *
Xvalid_author( num, art_num )
XWORD num;
Xlong art_num;
X{
X    if( num == -1 ) {
X	return Null(AUTHOR*);
X    }
X    if( num < 0 || num >= total.author ) {
X	log_error( "Invalid author in data file: %d [%ld]\n", num, art_num );
X	return Null(AUTHOR*);
X    }
X    return author_array[num];
X}
X
X/* Our parent/sibling information is a relative offset in the article array.
X** zero for none.  Child values are always found in the very next array
X** element if child_cnt is non-zero.
X*/
X#define valid_node( rel, num ) (!(rel)? Nullart : article_array[(rel)+(num)])
X
X/* Read the articles into their linked lists.  Point everything everywhere. */
Xint
Xread_articles()
X{
X    register int count;
X    register ARTICLE *article, **article_ptr;
X    int ret;
X
X    /* Build an array to interpret interlinkages of articles. */
X    article_array = (ARTICLE**)safemalloc( total.article * sizeof (ARTICLE*) );
X    article_ptr = article_array;
X
X    /* Allocate all the structures up-front so that we can point to un-read
X    ** siblings as we go.
X    */
X    for( count = total.article; count--; ) {
X	*article_ptr++ = (ARTICLE*)safemalloc( sizeof (ARTICLE) );
X    }
X    article_ptr = article_array;
X    for( count = 0; count < total.article; count++ ) {
X	ret = fread( &p_article, 1, sizeof (PACKED_ARTICLE), fp_in );
X	if( ret != sizeof (PACKED_ARTICLE) ) {
X	    log_error( "failed article read --  %d bytes instead of %d.\n", ret, sizeof (PACKED_ARTICLE) );
X	    ret = 0;
X	    goto finish_up;
X	}
X	article = *article_ptr++;
X	article->num = p_article.num;
X	article->date = p_article.date;
X	article->subject = valid_subject( p_article.subject, p_article.num );
X	article->author = valid_author( p_article.author, p_article.num );
X	article->flags = p_article.flags;
X	article->child_cnt = p_article.child_cnt;
X	article->parent = valid_node( p_article.parent, count );
X	article->children = article->child_cnt?article_array[count+1]:Nullart;
X	article->siblings = valid_node( p_article.siblings, count );
X	article->root = root_array[p_article.root];
X    }
X    ret = 1;
X
X  finish_up:
X    /* We're done with most of the pointer arrays. */
X    free( root_array );
X    free( subject_array );
X    free( author_array );
X    root_array = Null(ROOT**);
X    subject_array = Null(SUBJECT**);
X    author_array = Null(AUTHOR**);
X
X    return ret;
X}
X
X/* Read the message-id strings and attach them to each article.  The data
X** format consists of the mushed-together null-terminated strings (a domain
X** name followed by all its unique-id prefixes) and then the article offsets
X** to which they belong.  The first domain name was omitted, as it is the
X** ".unknown." domain for those truly weird message-id's without '@'s.
X*/
Xint
Xread_ids()
X{
X    register DOMAIN *domain, *last;
X    register ARTICLE *article;
X    register char *string_ptr;
X    register int i, count;
X
X    if( !read_item( &strings, total.string2 ) ) {
X	return 0;
X    }
X    if( !read_item( &ids,
X		(MEM_SIZE)(total.article+total.domain+1) * sizeof (WORD) ) ) {
X	return 0;
X    }
X    string_ptr = strings;
X
X    last = Null(DOMAIN*);
X    for( i = 0, count = total.domain + 1; count--; i++ ) {
X	if( i ) {
X	    domain = (DOMAIN*)safemalloc( sizeof (DOMAIN) );
X	    give_string_to( domain->name );
X	} else {
X	    domain = &unk_domain;
X	}
X	if( ids[i] == -1 ) {
X	    domain->ids = Nullart;
X	} else {
X	    article = article_array[ids[i]];
X	    domain->ids = article;
X	    for( ;; ) {
X		give_string_to( article->id );
X		article->domain = domain;
X		if( ids[++i] != -1 ) {
X		    article = article->id_link = article_array[ids[i]];
X		} else {
X		    article->id_link = Nullart;
X		    break;
X		}
X	    }
X	}
X	if( last ) {
X	    last->link = domain;
X	}
X	last = domain;
X    }
X    last->link = Null(DOMAIN*);
X    free( ids );
X    free( strings );
X    ids = Null(WORD*);
X    strings = Nullch;
X
X    return 1;
X}
X
X/* And finally, point all the roots at their root articles and get rid
X** of anything left over that was used to aid our unpacking.
X*/
Xvoid
Xtweak_roots()
X{
X    register ROOT *root;
X
X    for( root = root_root; root; root = root->link ) {
X	root->articles = article_array[root->seq];
X    }
X    free( article_array );
X    article_array = Null(ARTICLE**);
X}
X
X/* A short-hand for reading a chunk of the file into a malloc'ed array.
X*/
Xint
Xread_item( dest, len )
Xchar **dest;
XMEM_SIZE len;
X{
X    int ret;
X
X    *dest = safemalloc( len );
X    ret = fread( *dest, 1, (int)len, fp_in );
X    if( ret != len ) {
X	log_error( "Only read %ld bytes instead of %ld.\n",
X		(long)ret, (long)len );
X	free( *dest );
X	*dest = Nullch;
X	return 0;
X    }
X    return 1;
X}
X
X/* Interpret rn's '%X' and '%x' path prefixes without including all their
X** source.  Names that don't start with '%' or '/' are prefixed with the
X** SPOOL directory.
X*/
Xchar *
Xfile_exp( name )
Xchar *name;
X{
X    static char name_buff[256];
X
X    if( *name == '/' ) {	/* fully qualified names are left alone */
X	return name;
X    } else if( *name != '%' ) {	/* all normal names are relative to SPOOL */
X	sprintf( name_buff, "%s/%s", SPOOL, name );
X    } else {			/* interpret %x (LIB) & %X (RNLIB) */
X	if( name[1] == 'x' ) {
X	    strcpy( name_buff, LIB );
X	} else if( name[1] == 'X' ) {
X	    strcpy( name_buff, RNLIB );
X	} else {
X	    log_entry( "Unknown expansion: %s", name );
X	    exit( 1 );
X	}
X	strcat( name_buff, name+2 );
X    }
X    return name_buff;
X}
X
X#ifndef lint
X/* A malloc that bombs-out when memory is exhausted. */
Xchar *
Xsafemalloc( amount )
XMEM_SIZE amount;
X{
X    register char *cp;
X    extern char *malloc();
X
X    if( (cp = malloc( amount )) == Nullch ) {
X	log_error( "malloc(%ld) failed.\n", (long)amount );
X	exit( 1 );
X    }
X    return cp;
X}
X#endif
X
X/* Create a malloc'ed copy of a string. */
Xchar *
Xsavestr( str )
Xchar *str;
X{
X    register MEM_SIZE len = strlen( str ) + 1;
X    register char *newaddr = safemalloc( len );
X
X    bcopy( str, newaddr, (int)len );
X
X    return newaddr;
X}
X
X#ifndef lint
X/* Free some memory if it hasn't already been freed. */
Xvoid
XFree( pp )
Xchar **pp;
X{
X    if( *pp ) {
X	free( *pp );
X	*pp = Nullch;
X    }
X}
X#endif
SHAR_EOF
chmod 0660 mt-read.c || echo "restore of mt-read.c fails"
echo "x - extracting mt-write.c (Text)"
sed 's/^X//' << 'SHAR_EOF' > mt-write.c &&
X/* $Header: mt-write.c,v 4.3.3.1 90/07/24 23:51:18 davison Trn $
X**
X** $Log:	mt-write.c,v $
X** Revision 4.3.3.1  90/07/24  23:51:18  davison
X** Initial Trn Release
X** 
X*/
X
X#include "EXTERN.h"
X#include "common.h"
X#include "mthreads.h"
X
Xstatic FILE *fp_out;
Xstatic int seq;
Xstatic int article_seq;
X
Xstatic int failure;
X
Xvoid write_subjects(), write_authors(), write_roots(), write_ids();
Xvoid write_articles(), write_thread(), write_item();
Xvoid enumerate_articles(), enumerate_thread();
Xvoid free_leftovers();
X
X/* Write out all the data in a packed format that is easy for our newsreader
X** to use.  We free things as we go, when we don't need them any longer.  If
X** we encounter any write errors, the write_item routine sets a failure flag
X** to halt our writing of the file, but we keep on plugging away to free
X** everything up.
X*/
Xint
Xwrite_data( filename )
Xchar *filename;
X{
X    if( filename == Nullch ) {
X	failure = 2;	/* A NULL filename indicates just free the data */
X    } else if( !ensure_path( filename ) ) {
X	log_error( "Unable to create path: `%s'.\n", filename );
X	failure = 2;
X    } else if( (fp_out = fopen( filename, "w" )) == Nullfp ) {
X	log_error( "Unable to create file: `%s'.\n", filename );
X	failure = 2;
X    } else {
X	failure = 0;
X    }
X    write_item( &total, sizeof (TOTAL) );
X
X    enumerate_articles();
X
X    write_authors();
X    write_subjects();
X    write_roots();
X    write_articles();
X    write_ids();
X    free_leftovers();
X
X    if( failure != 2 ) {
X	fclose( fp_out );
X    }
X    if( failure == 1 ) {
X	log_error( "Write failed!  Removing `%s'.\n", filename );
X	unlink( filename );
X    }
X    return !failure;
X}
X
X/* Recursively descend the article tree, enumerating the articles as we go.
X** This way we can output the article sequence numbers into the data file.
X*/
Xvoid
Xenumerate_articles()
X{
X    register ROOT *root;
X
X    seq = article_seq = 0;
X
X    for( root = root_root; root; root = root->link ) {
X	root->seq = seq++;
X	if( !root->articles ) {
X	    log_error( "** No articles on this root??\n" );
X	    continue;
X	}
X	enumerate_thread( root->articles );
X    }
X    if( seq != total.root ) {
X	log_error( "** Wrote %d roots instead of %d **\n", seq, total.root );
X    }
X    if( article_seq != total.article ) {
X	log_error( "** Wrote %d articles instead of %d **\n", article_seq, total.article );
X    }
X}
X
X/* Recursive routine for above-mentioned enumeration. */
Xvoid
Xenumerate_thread( article )
XARTICLE *article;
X{
X    while( article ) {
X	article->seq = article_seq++;
X	if( article->children ) {
X	    enumerate_thread( article->children );
X	}
X	article = article->siblings;
X    }
X}
X
X#define write_and_free( str_ptr )	/* Comment for makedepend to	 \
X					** ignore the backslash above */ \
X{\
X    register int len = strlen( str_ptr ) + 1;\
X    write_item( str_ptr, len );\
X    free( str_ptr );\
X    string_offset += len;\
X}
X
XMEM_SIZE string_offset;
X
X/* Write out the author information:  first the use-counts, then the
X** name strings all packed together.
X*/
Xvoid
Xwrite_authors()
X{
X    register AUTHOR *author;
X
X    seq = 0;
X    for( author = author_root; author; author = author->link ) {
X	write_item( &author->count, sizeof (WORD) );
X	author->seq = seq++;
X    }
X    if( seq != total.author ) {
X	log_error( "** Wrote %d authors instead of %d **\n",
X		seq, total.author );
X    }
X
X    string_offset = 0;
X
X    for( author = author_root; author; author = author->link ) {
X	write_and_free( author->name );
X    }
X}
X
X/* Write out the subject information: first the packed string data, then
X** the use-counts.  The order is important -- it is the order required
X** by the roots for their subject structures.
X*/
Xvoid
Xwrite_subjects()
X{
X    register ROOT *root;
X    register SUBJECT *subject;
X
X    for( root = root_root; root; root = root->link ) {
X	for( subject = root->subjects; subject; subject = subject->link ) {
X	    write_and_free( subject->str );
X	}
X    }
X    if( string_offset != total.string1 ) {
X	log_error( "** Author/subject strings were %ld bytes instead of %ld **\n",
X		string_offset, total.string1 );
X    }
X
X    seq = 0;
X    for( root = root_root; root; root = root->link ) {
X	for( subject = root->subjects; subject; subject = subject->link ) {
X	    write_item( &subject->count, sizeof (WORD) );
X	    subject->seq = seq++;
X	}
X    }
X    if( seq != total.subject ) {
X	log_error( "** Wrote %d subjects instead of %d **\n",
X		seq, total.subject );
X    }
X}
X
X/* Write the roots in a packed format.  Interpret the pointers into
X** sequence numbers as we go.
X*/
Xvoid
Xwrite_roots()
X{
X    register ROOT *root;
X
X    for( root = root_root; root; root = root->link ) {
X	p_root.articles = root->articles->seq;
X	p_root.root_num = root->root_num;
X	p_root.thread_cnt = root->thread_cnt;
X	p_root.subject_cnt = root->subject_cnt;
X	write_item( &p_root, sizeof (PACKED_ROOT) );
X    }
X}
X
X#define rel_article( article, rseq )	((article)? (article)->seq - (rseq) : 0)
X#define valid_seq( ptr )		((ptr)? (ptr)->seq : -1)
X
X/* Write all the articles in the same order that we sequenced them. */
Xvoid
Xwrite_articles()
X{
X    register ROOT *root;
X
X    for( root = root_root; root; root = root->link ) {
X	write_thread( root->articles );
X    }
X}
X
X/* Recursive routine to write the article in thread order.  We depend on
X** the fact that our first child is the very next article written (if we
X** have children).
X*/
Xvoid
Xwrite_thread( article )
Xregister ARTICLE *article;
X{
X    while( article ) {
X	p_article.num = article->num;
X	p_article.date = article->date;
X	p_article.subject = valid_seq( article->subject );
X	p_article.author = valid_seq( article->author );
X	p_article.flags = (article->flags & ~NEW_ARTICLE);
X	p_article.child_cnt = article->child_cnt;
X	p_article.parent = rel_article( article->parent, article->seq );
X	p_article.siblings = rel_article( article->siblings, article->seq );
X	p_article.root = article->root->seq;
X	write_item( &p_article, sizeof (PACKED_ARTICLE) );
X	if( article->children ) {
X	    write_thread( article->children );
X	}
X	article = article->siblings;
X    }
X}
X
XWORD minus_one = -1;
X
X/* Write the message-id strings:  each domain name (not including the
X** ".unknown." domain) followed by all of its associated unique ids.
X** Then output the article sequence numbers they belong to.  This stuff
X** is last because the newsreader doesn't need to read it.
X*/
Xvoid
Xwrite_ids()
X{
X    register DOMAIN *domain;
X    register ARTICLE *id;
X    register DOMAIN *next_domain;
X    register ARTICLE *next_id;
X
X    string_offset = 0;
X
X    for( domain = &unk_domain; domain; domain = domain->link ) {
X	if( domain != &unk_domain ) {
X	    write_and_free( domain->name );
X	    if( !domain->ids ) {
X		log_error( "** Empty domain name!! **\n" );
X	    }
X	}
X	for( id = domain->ids; id; id = id->id_link ) {
X	    write_and_free( id->id );
X	}
X    }
X    if( string_offset != total.string2 ) {
X	log_error( "** Message-id strings were %ld bytes (%ld) **\n",
X		string_offset, total.string2 );
X    }
X    for( domain = &unk_domain; domain; domain = next_domain ) {
X	next_domain = domain->link;
X	for( id = domain->ids; id; id = next_id ) {
X	    next_id = id->id_link;
X	    write_item( &id->seq, sizeof (WORD) );
X	    free( id );
X	}
X	write_item( &minus_one, sizeof (WORD) );
X	if( domain != &unk_domain ) {
X	    free( domain );
X	}
X    }
X    unk_domain.ids = Nullart;
X    unk_domain.link = Null(DOMAIN*);
X}
X
X/* Free everything that's left to free.
X*/
Xvoid
Xfree_leftovers()
X{
X    register ROOT *root, *next_root;
X    register SUBJECT *subj, *next_subj;
X    register AUTHOR *author, *next_author;
X
X    for( root = root_root; root; root = next_root ) {
X	next_root = root->link;
X	for( subj = root->subjects; subj; subj = next_subj ) {
X	    next_subj = subj->link;
X	    free( subj );
X	}
X	free( root );
X    }
X    for( author = author_root; author; author = next_author ) {
X	next_author = author->link;
X	free( author );
X    }
X    root_root = Null(ROOT*);
X    author_root = Null(AUTHOR*);
X}
X
X/* This routine will check to be sure that the required path exists for
X** the data file, and if not it will attempt to create it.
X*/
Xint
Xensure_path( filename )
Xregister char *filename;
X{
X    int status, pid, w;
X    char tmpbuf[1024];
X#ifdef MAKEDIR
X    register char *cp, *last;
X    register char *tbptr = tmpbuf+5;
X
X    if( !(last = rindex( filename, '/' )) ) {	/* find filename portion */
X	return 1;				/* no path, we're fine */
X    }
X    *last = '\0';				/* truncate path at filename */
X    strcpy( tmpbuf, "mkdir" );
X
X    for( cp = last;; ) {
X	if( stat( filename, &filestat ) >= 0 && (filestat.st_mode & S_IFDIR) ) {
X	    *cp = '/';
X	    break;
X	}
X	if( !(cp = rindex( filename, '/' )) ) {/* find something that exists */
X	    break;
X	}
X	*cp = '\0';
X    }
X    
X    for( cp = filename; cp <= last; cp++ ) {
X	if( !*cp ) {
X	    sprintf( tbptr, " %s", filename );
X	    tbptr += strlen( tbptr );		/* set up for mkdir call */
X	    *cp = '/';
X	}
X    }
X    if( tbptr == tmpbuf+5 ) {
X	return 1;
X    }
X#else
X    sprintf(tmpbuf,"%s %s %d", filexp(DIRMAKER), filename, 1);
X#endif
X
X    if ((pid = vfork()) == 0) {
X	execl(SH, SH, "-c", tmpbuf, Nullch);
X	_exit(127);
X    }
X    while ((w = wait(&status)) != pid && w != -1)
X	;
X    if (w == -1)
X	status = -1;
X    return !status;
X}
X
X/* A simple routine to output some data only if we haven't failed any
X** previous writes.
X*/
Xvoid
Xwrite_item( buff, len )
Xchar *buff;
Xint len;
X{
X    if( !failure ) {
X	if( fwrite( buff, 1, len, fp_out ) < len ) {
X	    failure = 1;
X	}
X    }
X}
SHAR_EOF
chmod 0660 mt-write.c || echo "restore of mt-write.c fails"
echo "x - extracting mt.check.SH (Text)"
sed 's/^X//' << 'SHAR_EOF' > mt.check.SH &&
Xcase $CONFIG in
X    '') . ./config.sh ;;
Xesac
Xecho "Extracting mt.check (with variable substitutions)"
X$spitshell >mt.check <<!GROK!THIS!
X$startsh
X# $Header: mt.check.SH,v 4.3.3.1 90/06/20 23:00:07 davison Trn $
X#
X# $Log:	mt.check.SH,v $
X# Revision 4.3.3.1  90/06/20  23:00:07  davison
X# Initial Trn Release
X# 
X# mt.check - daily maintenance for mt.log
SHAR_EOF
echo "End of part 8"
echo "File mt.check.SH is continued in part 9"
echo "9" > s2_seq_.tmp
exit 0

exit 0 # Just in case...
-- 
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.
Use a domain-based address or give alternate paths, or you may lose out.



More information about the Comp.sources.unix mailing list