Relay-Version: version B 2.10 5/3/83; site utzoo.UUCP
Posting-Version: version B 2.10.2 9/5/84; site cognos.UUCP
Path: utzoo!dciem!nrcaer!cognos!garyp
From: garyp@cognos.UUCP (Gary Puckering)
Newsgroups: net.sources
Subject: slice utility (improved mailsplit)
Message-ID: <288@cognos.UUCP>
Date: Tue, 16-Dec-86 17:07:31 EST
Article-I.D.: cognos.288
Posted: Tue Dec 16 17:07:31 1986
Date-Received: Thu, 18-Dec-86 00:33:40 EST
Reply-To: garyp@cognos.UUCP (Gary Puckering)
Distribution: net
Organization: Cognos Inc., Ottawa, Canada
Lines: 542
Slice splits up a file into lots of little files. It reads its input a
line at a time, and starts a new output file when
* the input line matches a pattern, or
* there have been n lines written to the current output file.
You can use it to split a mailbox or an archive of news articles into
one article per file, for example. In fact, you can do this with about
5 lines of awk, but you run into problems with long lines (and speed,
if it bothers you!).
Slice was originally contributed by Russell Quinn as the program
"mailsplit". Unlike mailsplit, however, slice allows multiple output
formats to be specified (rather than multiple input files). This makes
it possible to deposit the pieces (slices!) into files named whatever
your want. For example:
slice 'Makefile'
X# Makefile for slice
X#
X# Originally contributed at mailsplit, written by:
X# R E Quin, October 1986 University of Warwick (UK) Computer Science
X# warwick!req +44 203 523193
X#
X# Modified and recontributed by:
X# Gary Puckering 3755 Riverside Dr.
X# Cognos Incorporated Ottawa, Ontario
X# (613) 738-1440 CANADA K1G 3N3
X#
X# This makefile is intended for the sys5 Augmented make.
X#
XMAKE=make
XCLEAN=clean
XCC=cc
XHACKS=
XCFLAGS=-O $(HACKS)
X# R is the root of the filesystem -- i.e. where to install things.
X# The binaries are installed in $R/$(DESTDIR).
XR=/usr/local
XDESTDIR=$R/usr/bin
XMANDIR=$R/usr/man/man1
XPROG=slice
X
X# PROG is what to make; DESTDIR is where to put it.
X# HACKS are for -DBUGFIX style things.
X
X# R is intended to be the root of the filesystem if it isn't "/"
X
X# "make install " does a $(MAKE) $(CLEAN) at the end, so you can say
X# CLEAN= make -e install
X# if you don't want to remove the garbage at the end, for example.
X# This is useful primarily for testing the install: entry!
X
Xall: $(PROG)
X
Xslice: opts.h slice.o
X $(CC) -o $(PROG) slice.o
X
Xinstall: slice
X /bin/mv $(PROG) $(DESTDIR)
X /bin/cp slice.1 $(MANDIR)
X $(MAKE) $(CLEAN)
X
Xclean:
X rm -rf core *.o $(PROG) a.out
SHAR_EOF
if test 1221 -ne "`wc -c 'Makefile'`"
then
echo shar: error transmitting "'Makefile'" '(should have been 1221 characters)'
fi
echo shar: extracting "'opts.h'" '(769 characters)'
if test -f 'opts.h'
then
echo shar: over-writing existing file "'opts.h'"
fi
sed 's/^X//' << \SHAR_EOF > 'opts.h'
X
X#define FALSE 0
X#define TRUE 1
Xtypedef int bool;
X
X#define EXIT_SYNTAX 1 /* syntax error parsing commandline options */
X#define EXIT_SEMANT 2 /* options are correct but meaningless */
X#define EXIT_RUNERR 3 /* error opening a file, for example */
X#define EXIT_INTERN 4 /* internal error -- bug!! */
X
X#define nextstr(s,count,array,failure) \
X {if (((count)<2) && !((array)[0][1])) {failure;}\
X else {if ((array)[0][1]) { s = &((array)[0][1]); } \
X else {s = array[1]; --count; array++;}}}
X
X#define DFLTNAME "slice" /* input filename (for stdin) */
X#define BUFLEN BUFSIZ /* the maximum length of an input line (incl. "\n\0") */
X#define MAXFILENAMELEN BUFSIZ /* longer than the longest possible file name */
X#define DFLTOUTNAME "%s:%03.d" /* o/p file name format */
X
SHAR_EOF
if test 769 -ne "`wc -c 'opts.h'`"
then
echo shar: error transmitting "'opts.h'" '(should have been 769 characters)'
fi
echo shar: extracting "'slice.c'" '(8964 characters)'
if test -f 'slice.c'
then
echo shar: over-writing existing file "'slice.c'"
fi
sed 's/^X//' << \SHAR_EOF > 'slice.c'
X/* slice -- split files at lines that match a pattern */
X#include
X#include
X
X#include "opts.h" /* defines nextstr() etc */
X
Xchar *progname = "slice"; /* for error messages */
Xchar *pattern = (char *) NULL; /* reg expr used to split file */
Xchar **format; /* ptr for format strings */
Xint n_format; /* number of format strings */
Xchar *defaultfmt[] = {DFLTOUTNAME}; /* default format string */
Xint filenumber = 0;
Xint every_n_lines = 0; /* split every n lines */
Xbool exclude = FALSE; /* exclude matched line from o/p files */
Xbool split_after = FALSE; /* split after matched line */
X
Xusage(status)
X int status; /* exit if status != 0 */
X{
X fprintf(stderr,"Usage: %s [-f filename] [-a] [-x] [-i] [-m|-s|-n] [-e expression | expression] [format...]\n", progname);
X if (status)
X exit(status);
X}
X
Xmain(argc, argv)
X char *argv[];
X{
X /* split files at points that match a given pattern */
X /* initialise things */
X bool donefiles = FALSE;
X char *buffer;
X char *infile = (char *) NULL;
X
X int getnum(); /* does more checking than atoi */
X char *rmpath(); /* removes leading pathname from a filename */
X
X /* now remove possible leading pathname
X * (e.g. /usr/bin/slice is to report it's errors as slice
X */
X progname = rmpath(argv[0]);
X
X
X while (--argc) {
X if (**++argv == '-') {
X switch(*++*argv) {
X case 'a': { /* split after pattern */
X split_after = TRUE;
X break;
X }
X case 'e': { /* pattern (expression) */
X ++argv; argc--;
X if (argc==0 || !**argv) {
X error("Pattern after -e missing or null\n");
X usage(1);
X }
X pattern = *argv;
X break;
X }
X case 'm': { /* mailbox pattern */
X pattern = "^From ";
X break;
X }
X case 's': { /* shell pattern */
X pattern = "^#! *\/bin\/sh";
X break;
X }
X case 'n': { /* -n n_lines -- split every n lines */
X nextstr(buffer,argc,argv,usage(2));
X every_n_lines = getnum(buffer);
X if (every_n_lines <= 0) {
X error("-n: number must be at least 1\n");
X exit(EXIT_SYNTAX);
X }
X break;
X }
X case 'f': {
X ++argv; argc--;
X if (argc==0 || !**argv) {
X error("Filename after -f missing or null\n");
X usage(1);
X }
X infile = *argv;
X break;
X }
X case 'i': { /* -i initial_number */
X nextstr(buffer,argc,argv,usage(2));
X filenumber = getnum(buffer);
X if (filenumber < 0) {
X error("-i must be followed by a positive number\n");
X exit(EXIT_SYNTAX);
X }
X filenumber--; /* needs to be one less to start with */
X break;
X }
X case 'x': { /* exclude matched lines */
X exclude = TRUE;
X break;
X }
X default: {
X error("Unknown flag -%c\n", **argv);
X usage(1);
X }
X } /* end switch */
X } else {
X if (!pattern) pattern = *argv; /* first non-flag is pattern */
X else break; /* break while loop */
X } /* end if */
X } /* end while */
X
X if (!argc) {
X format = defaultfmt;
X n_format = 1; }
X else {
X format = argv;
X n_format = argc;
X }
X
X#ifdef DEBUG
X printf("argc=%d\n",argc);
X printf("format='%s'\n",*format);
X printf("pattern='%s'\n",pattern);
X#endif
X
X if (!infile) split(stdin, DFLTNAME, pattern);
X else fsplit(infile, pattern);
X
X exit(0);
X}
X
Xfsplit(name, pat)
X char *name;
X char *pat;
X{
X FILE *fd;
X
X if (!name || !*name) {
X error("Can't split a file with an empty name\n");
X usage(2);
X }
X
X if ( (fd = fopen(name, "r")) == NULL) {
X error("Can't open %s\n", name);
X return;
X }
X
X (void) split(fd, name, pat);
X
X if (fclose(fd) == EOF) { /* something's gone wrong */
X error("Can't close %s -- giving up\n", name);
X exit(EXIT_RUNERR);
X }
X}
X
Xchar buffer[BUFLEN];
X
Xint
Xsplit(input, name, pattern)
X FILE *input; /* fd of input file */
X char *name; /* input filename */
X char *pattern; /* pattern used to split file */
X{
X /* do the real work here. Oh dear, I don't know how... */
X /* we are always called with an open file. */
X
X extern char *re_comp(); /* compile string into automaton */
X extern int re_exec(); /* try to match string */
X#define REMATCH 1
X#define RENOMATCH 0
X#define REFAULT -1
X
X char *errmessage;
X FILE *output = NULL;
X char fnambuf[MAXFILENAMELEN + 2]; /* +1 for null, +1 for overflow */
X int reg_status = 0; /* regular expression status */
X int line = 0;
X
X if (split_after && exclude) {
X error("Can't specify both -a and -x\n");
X usage(2);
X }
X
X if (every_n_lines && exclude) {
X error("Can't specify both -n and -x\n");
X usage(2);
X }
X
X if (every_n_lines && split_after) {
X error("Can't specify both -n and -a\n");
X usage(2);
X }
X
X if (every_n_lines && pattern) {
X error("Can't specify both -n and pattern\n");
X usage(2);
X }
X
X if (!every_n_lines && (!pattern || !*pattern)) {
X error("Can't match an empty pattern\n");
X usage(2);
X }
X
X if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) {
X error("Error in pattern <%s>: %s\n", pattern, errmessage);
X exit(EXIT_RUNERR);
X }
X /* errmessage is NULL here */
X
X /* the -2 to fgets is because of the null and \n appended */
X while (fgets(buffer, BUFLEN - 2, input) != NULL) {
X if (!output || /* first line */
X (every_n_lines > 0 && (++line == every_n_lines)) || /* nth line */
X (!every_n_lines &&
X ((reg_status = re_exec(buffer)) == REMATCH)) ) { /* matches pat */
X /* don't look at 1st line of file, to avoid an infinite */
X /* recursion... */
X
X if (output && split_after) {
X fputs(buffer, output);
X }
X
X if (n_format && mkname(fnambuf, name)) {;
X /* check for output file = input file */
X if (strcmp(fnambuf,name)==0) {
X error("Output file same as input file\n");
X exit(EXIT_RUNERR);
X }
X /* start a new file */
X if (output && output != stdout) {
X if (fclose(output) == EOF) {
X error("Can't close output file\n");
X exit(EXIT_RUNERR);
X }
X output = NULL;
X }
X line = 0;
X if (fnambuf[0]=='+' && fnambuf[1]==NULL) {
X output = stdout;
X } else {
X if ((output = fopen(fnambuf, "a")) == NULL) {
X error("Can't open output file %s\n", fnambuf);
X exit(EXIT_RUNERR);
X }
X }
X /* if matched lines are excluded, skip the fputs */
X if (exclude && reg_status == REMATCH) continue;
X
X /* if file is to be split after pattern, put already done */
X if (split_after && reg_status == REMATCH) continue;
X } else {
X error("Insufficient formats -- last file contains remainder\n");
X }
X } else if (reg_status == REFAULT) {
X /* the re_exec failed */
X error("Internal error trying to match <%s> to <%s>\n",
X pattern, buffer);
X exit(EXIT_INTERN);
X }
X fputs(buffer, output);
X }
X return (filenumber == -1); /* exit status for main */
X}
X
Xbool
Xmkname(fnambuf, name)
X char *fnambuf;
X char *name;
X{
X int i, s = -1, d = -1;
X static bool new_format = TRUE;
X static bool perpetual = FALSE;
X static bool d_before_s = FALSE;
X
X if (new_format) {
X if (!n_format) {
X error("Internal error: mkname called but formats have run out\n");
X exit(EXIT_INTERN);
X }
X i = bfsearch(*format, "%",0);
X s = bfsearch(*format, "%s",0);
X if (i>=0 && i==s) d = bfsearch(*format, "%",++i);
X else d = i;
X if (d<0) perpetual = FALSE;
X else perpetual = TRUE;
X if (d