v18i021: Selectively retrieve news article headers

Rich Salz rsalz at uunet.uu.net
Tue Mar 14 09:27:38 AEST 1989


Submitted-by: bin at primate.wisc.edu (Brain in Neutral)
Posting-number: Volume 18, Issue 21
Archive-name: headers

"headers" is a program used to retrieve selected header lines from
files containing Usenet articles.  It provides a convenient way to
scan through large numbers of articles for pertinent information,
and it is faster than using grep.  A typical use is to cd into a news
spooling or archive directory and say "headers * | more".

#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
#-----cut here-----cut here-----cut here-----cut here-----
#!/bin/sh
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	Readme
#	Installation
#	Makefile
#	headers.c
#	headers.1
echo shar: extracting Readme '(329 characters)'
sed 's/^XX//' << \SHAR_EOF > Readme
XX"headers" is a program used to retrieve selected header lines from
XXfiles containing Usenet articles.  It provides a convenient way to
XXscan through large numbers of articles for pertinent information,
XXand it is faster than using grep.  A typical use is to cd into a news
XXspooling or archive directory and say "headers * | more".
XX
SHAR_EOF
if test 329 -ne "`wc -c Readme`"
then
echo shar: error transmitting Readme '(should have been 329 characters)'
fi
echo shar: extracting Installation '(370 characters)'
sed 's/^XX//' << \SHAR_EOF > Installation
XX1) Decide where you want to install headers and change BINDIR in the
XXMakefile accordingly.
XX
XX2) If you run a BSD-like Unix, define BSD (-DBSD) in the CFLAGS, else
XXif you are SYSV-like, define SYSV (-DSYSV).  The latter hasn't been tested,
XXsince I don't have a SYSV machine.  Somebody that does, send me patches
XXif necessary.
XX
XX3) Comments, bugs to dubois at primate.wisc.edu
SHAR_EOF
if test 370 -ne "`wc -c Installation`"
then
echo shar: error transmitting Installation '(should have been 370 characters)'
fi
echo shar: extracting Makefile '(492 characters)'
sed 's/^XX//' << \SHAR_EOF > Makefile
XXBINDIR=/usr/local
XXLIBDIR=
XXLIB=
XXINSTALL=-c -m 755 -o bin -g system
XXTROFF=xroff
XXMACROS=-man.new
XXPRINTER=lpr
XX
XX# define BSD or SYSV (latter is untested)
XX
XXCFLAGS=-DBSD
XX
XXall: headers
XXinstall: iheaders iman
XXclean:
XX	rm -f *.o headers
XX
XXOBJ=headers.o
XX
XXheaders: ${OBJ}
XX	cc ${OBJ} ${LIB} -o headers
XXiheaders: headers
XX	install ${INSTALL} headers ${BINDIR}
XX
XXman: headers.1
XX	${TROFF} ${MACROS} headers.1 | ${PRINTER}
XX
XXiman: headers.1
XX	cp headers.1 /usr/man/manl/headers.l
XX	chmod 444 /usr/man/manl/headers.l
SHAR_EOF
if test 492 -ne "`wc -c Makefile`"
then
echo shar: error transmitting Makefile '(should have been 492 characters)'
fi
echo shar: extracting headers.c '(5653 characters)'
sed 's/^XX//' << \SHAR_EOF > headers.c
XX/*
XX	headers
XX
XX	Retrieve selected news article headers.  Only reads into the article
XX	file as far as the blank line separating the headers from the body.
XX	This is more efficient than other searching methods that may read
XX	the entire file.  A convenient way to see what's in a newsgroup:
XX
XX		cd /usr/spool/news/comp/unix/wizards
XX		headers * | more
XX
XX	syntax:  headers [ -f specfile ] [ -hheaderspec ... ] file ...
XX
XX	The set of headers to pull is found in the file specfile or in
XX	the specs given after -h option(s).  If neither -f nor -h are
XX	given, and the HEADERS environment variable names a file, that
XX	file will be used.  Otherwise the Subject:, From: and Date:
XX	headers are the default (with reasonable alternates for
XX	each if they are missing).  Capitalization is not important.
XX
XX	The program reads each of the headers and determines whether
XX	they are any of the ones desired.  When all headers have been
XX	read, the ones found are printed out according to the order
XX	of the specifications.
XX
XX	This code is in the public domain.
XX
XX	06 Dec 84 Version 1.0.  Paul DuBois, dubois at primate.wisc.edu
XX	22 Nov 88 v1.1 Revised to do case-insensitive comparisons.
XX		Added -h option capability.
XX*/
XX
XX# include	<stdio.h>
XX# include	<ctype.h>
XX# include	<varargs.h>
XX# ifdef BSD
XX# include	<strings.h>
XX# else if SYSV
XX# include	<string.h>
XX# endif
XX
XX
XX# define	New(x)		((x *) calloc (1, sizeof (x)))
XX
XXextern char	*calloc ();
XX
XXchar	*newstr ();
XXvoid	panic ();
XX
XXtypedef struct Header	Header;
XX
XXstruct Header
XX{
XX	char	*hName;		/* name of header field */
XX	char	hBuf[BUFSIZ];	/* line from article for this header */
XX	Header	*nextGrp;	/* next group of headers */
XX	Header	*nextHdr;	/* next header in this group */
XX};
XX
XX
XXHeader	*head = NULL;		/* pointer to output spec structure */
XX
XX
XXmain (argc, argv)
XXint	argc;
XXchar	**argv;
XX{
XXchar	*p, *getenv ();
XX	
XX	if (*++argv != NULL && strncmp (*argv, "-h", 2) == 0)	/* cmd line */
XX	{
XX		for (;;)
XX		{
XX			GroupSpecs (&argv[0][2]);
XX			if (strncmp (*++argv, "-h", 2) != 0)
XX				break;
XX		}
XX	}
XX	else if (strcmp ("-f", *argv) == 0)			/* named file */
XX	{
XX		if (*++argv == NULL)
XX			panic ("No header file named after -f");
XX		FileSpecs (*argv++);
XX	}
XX	else if ((p = getenv ("HEADERS")) != NULL)		/* env var */
XX		FileSpecs (p);
XX	else							/* default */
XX	{
XX		GroupSpecs ("subject summary keywords");
XX		GroupSpecs ("from reply-to sender");
XX		GroupSpecs ("date");
XX	}
XX
XX	while (*argv != NULL)		/* process input files */
XX		Headers (*argv++);
XX	exit (0);
XX}
XX
XX
XX/*
XX	Read specifications from file
XX*/
XX
XXFileSpecs (fname)
XXchar	*fname;
XX{
XXchar	buf[BUFSIZ];
XX
XX	if (freopen (fname, "r", stdin) == NULL)
XX		panic ("Can't open specfile %s", fname);
XX	while (fgets (buf, BUFSIZ, stdin) != NULL)
XX		GroupSpecs (buf);
XX}
XX
XX/*
XX	Process specification for one group of header names
XX*/
XX
XXGroupSpecs (bp)
XXchar	*bp;
XX{
XXstatic Header	*gtail;		/* last group in list of groups */
XXHeader		*htail;		/* last header in current group */
XX
XX	if ((bp = strtok (bp, " ,\t\n")) != NULL)
XX	{
XX		if (head == NULL)	/* first group? */
XX		{
XX			if ((head = New (Header)) == NULL)
XX				panic ("GroupSpecs: out of memory");
XX			gtail = head;
XX		}
XX		else			/* add list to last one */
XX		{
XX			if ((gtail->nextGrp = New (Header)) == NULL)
XX				panic ("GroupSpecs: out of memory");
XX			gtail = gtail->nextGrp;
XX		}
XX		gtail->hName = newstr (bp);
XX		lower (gtail->hName);
XX		htail = gtail;
XX		while ((bp = strtok (NULL, " ,\t\n")) != NULL)
XX		{
XX			if ((htail->nextHdr = New (Header)) == NULL)
XX					panic ("GroupSpecs: out of memory");
XX			htail = htail->nextHdr;
XX			htail->hName = newstr (bp);
XX			lower (htail->hName);
XX		}
XX	}
XX}
XX
XX
XX/*
XX	Clear header buffers so won't get debris from previous articles,
XX	then read headers from article and save any that are present in
XX	the specifications, and print 'em out.
XX*/
XX
XXHeaders (article)
XXchar	*article;
XX{
XXchar	c;
XXchar	buf[BUFSIZ];
XXchar	hdrName[BUFSIZ];
XXchar	*hp, *bp;
XXHeader	*lp, *ep;
XX
XX	if (freopen (article, "r", stdin) == NULL)
XX	{
XX		fprintf (stderr, "%s: cannot open article\n", article);
XX		return;
XX	}
XX
XX	for (lp = head; lp != NULL; lp = lp->nextGrp)
XX		for (ep = lp; ep != NULL; ep = ep->nextHdr)
XX			ep->hBuf[0] = '\0';
XX
XX	while (fgets (buf, BUFSIZ, stdin) != NULL)
XX	{
XX		if (*buf == '\n' || *buf == '\0')
XX			break;		/* end of header section */
XX
XX		hp = hdrName;		/* get header name */
XX		bp = buf;
XX		while ((c = *bp) && c != ':' && c != ' ' && c != '\n')
XX		{
XX			*hp++ = c;
XX			++bp;
XX		}
XX		*hp = '\0';
XX		lower (hdrName);
XX		CheckHeader (hdrName, buf);
XX	}
XX
XX	printf ("\n%s\n", article);
XX	for (lp = head; lp != NULL; lp = lp->nextGrp)
XX	{
XX		for (ep = lp; ep != NULL; ep = ep->nextHdr)
XX		{
XX			if (ep->hBuf[0] != '\0')
XX			{
XX				fputs (ep->hBuf, stdout);
XX				break;
XX			}
XX		}
XX	}
XX}
XX
XX
XX/*
XX	Check whether the header name is in the specs and save the
XX	line from the article if so.
XX*/
XX
XXCheckHeader (hdrName, artLine)
XXchar	*hdrName, *artLine;
XX{
XXHeader	*lp, *ep;
XX
XX	for (lp = head; lp != NULL; lp = lp->nextGrp)
XX	{
XX		for (ep = lp; ep != NULL; ep = ep->nextHdr)
XX		{
XX			if (strcmp (ep->hName, hdrName) == 0)
XX			{
XX				strcpy (ep->hBuf, artLine);
XX				return;
XX			}
XX		}
XX	}
XX}
XX
XX
XX/*
XX	Convert string to lowercase
XX*/
XX
XXlower (s)
XXchar	*s;
XX{
XX	while (*s)
XX	{
XX		if (isupper (*s))
XX			*s = tolower (*s);
XX		++s;
XX	}
XX}
XX
XX
XX/*
XX	Get space for string, copy arg into it, and return pointer.
XX*/
XX
XXchar *newstr (s)
XXchar	*s;
XX{
XXchar	*p;
XX
XX	if ((p = calloc (1, (strlen (s) + 1))) == NULL)
XX		panic ("newstr: out of memory");
XX	strcpy (p, s);
XX	return (p);
XX}
XX
XX
XX/*
XX	panic - print message and die with status 1.  Uses vprintf
XX	so that panic can take variable argument lists.
XX*/
XX
XXvoid
XXpanic (va_alist)
XXva_dcl
XX{
XXva_list	args;
XXchar	*fmt;
XX
XX	va_start (args);
XX	fmt = va_arg (args, char *);
XX	vfprintf (stderr, fmt, args);
XX	va_end (args);
XX	fprintf (stderr, "\n");
XX	exit (1);
XX}
SHAR_EOF
if test 5653 -ne "`wc -c headers.c`"
then
echo shar: error transmitting headers.c '(should have been 5653 characters)'
fi
echo shar: extracting headers.1 '(2833 characters)'
sed 's/^XX//' << \SHAR_EOF > headers.1
XX.\" xroff -man.new % | lpr
XX.TH HEADERS 1
XX.UC 4
XX.SH NAME
XXheaders \- selectively retrieve news article headers
XX.SH SYNTAX
XX.B headers
XX[
XX.B \-f
XX.I specfile
XX]
XX.I file
XX.B \&...
XX.br
XX.B headers
XX[
XX.B \-h\fIheaderspec\fR
XX.B \&...
XX]
XX.I file
XX.B \&...
XX.SH DESCRIPTION
XX.I Headers
XXlooks through the header section of Usenet news articles
XXand prints headers on a user-selectable basis.
XXBecause it only
XXreads the header section and not the body of articles, it provides
XXa fast way of looking through large numbers of articles (particularly
XXin sources groups).
XXMultiple
XXheaders per article can be retrieved, and headers can be grouped
XXso that if one is missing (e.g., "From:"), another can be selected
XXin its place (e.g., "Reply-To:").
XXIn a sense,
XX.I headers
XXacts like an intelligent
XX.I fgrep
XXfor news articles.
XX.PP
XXThe headers to print can be specified either on the command
XXline or in a file.
XXCapitalization is irrelevant with either method.
XXIf the
XX.B \-f
XXoption is used, the contents of
XX.I specfile
XXwill be examined for specifications.
XXThe header groups should be listed one per line in this file.
XXThe groups should be
XXlisted in the order you want the headers to be printed.
XXWithin each group, list first the header you really want, followed by
XXany reasonable alternative headers to print if that one is missing.
XXHeader names should be separated by whitespace (blanks, tabs) or commas.
XXExample:
XX.sp .5
XX.in +1i
XX.nf
XXsubject
XXfrom sender reply-to
XXdate posted posted-date
XX.fi
XX.in -1i
XX.sp .5
XXThis example will print the "Subject:" header first, followed by the "From:"
XXheader (or the "Sender:" or "Reply-To:" header if there is no "From:"),
XXfollowed by
XXthe "Date:" header.
XX.PP
XXAlternatively, the
XX.B \-h
XXoption may be used, once for each header group.
XXHeader names in each group should be separated by commas.
XXThe above example would be written
XX.sp .5
XX.ti +1i
XXheaders\0-hsubject\0-hfrom,sender,reply-to\0-hdate
XX.sp .5
XXIf neither
XX.B \-f
XXnor
XX.B \-h
XXis used, but the HEADERS environment variable is set to the name of a file,
XXthat file will be examined in the same fashion as for
XX.B \-f
XXto determine specifications.
XXFailing all of these, the default specifications are:
XX.sp .5
XX.in +1i
XX.nf
XXsubject summary keywords
XXfrom reply-to sender
XXdate
XX.fi
XX.in -1i
XX.sp .5
XX.PP
XXAfter digesting the specifications, each file is examined in turn;
XXthe name of the file is printed followed by any
XXany headers found, according to the order of the specifications.
XX.SH NOTE
XXThe output of
XX.I headers
XXcan be put into a file and used as input to
XX.IR lookbib .
XX.SH "SEE ALSO"
XXfgrep(1), lookbib(1), news(5)
XX.SH "WHO-TO-BLAME"
XXPaul DuBois, dubois at primate.wisc.edu
XX.SH BUGS
XXDoesn't stat the input files to determine whether they're really
XXplain files (as opposed to, say, directories).
XXThis isn't usually a problem; directory contents typically don't satisfy
XXmany of the specifications.
SHAR_EOF
if test 2833 -ne "`wc -c headers.1`"
then
echo shar: error transmitting headers.1 '(should have been 2833 characters)'
fi
#	End of shell archive
exit 0

-- 
Please send comp.sources.unix-related mail to rsalz at uunet.uu.net.



More information about the Comp.sources.unix mailing list