Path: utzoo!attcan!uunet!husc6!rutgers!sunybcs!boulder!forys From: forys@sigi.Colorado.EDU (Jeff Forys) Newsgroups: comp.mail.mh Subject: Re: Sorting messages by Date and Subject? Summary: A new version of sortm(1) that sorts by Date and Subject. Message-ID: <6931@sigi.Colorado.EDU> Date: 4 Jul 88 22:13:57 GMT References: <6848@sigi.Colorado.EDU> Reply-To: forys@boulder.Colorado.EDU (Jeff Forys) Organization: University of Colorado, Boulder Lines: 508 In article <6848@sigi.Colorado.EDU> I wrote: > Has anyone written a script to sort messages by Date and Subject? Craig Leres (leres@helios.ee.lbl.gov) sent me a version of sortm(1) that sorts messages by Date and Subject (when given the "-subj" flag). The code was written by Van Jacobson (now, where have you heard that name before?). Several people have expressed interest in this, and I've received permission to post the revised version. What follows, is a revised sortm(1) that does "just what I wanted"... Thanks! Jeff Forys --------------------------------- cut here ------------------------------ /* * revision 1.3 * date: 87/05/20 21:30:02; author: van; state: Exp; lines added/del: 122/82 * corrected sorting of subsets of folder. Just permute original * numbers. Don't pack or renumber. * * revision 1.2 * date: 87/05/19 05:33:38; author: van; state: Exp; lines added/del: 360/256 * added subject sorting (-subj flag) * * revision 1.1 * date: 87/05/18 22:58:33; author: van; state: Exp; * Initial revision */ /* sortm.c - sort messages in a folder by date/time */ #include "../h/mh.h" #include "../zotnet/tws.h" #include#include #include #include static struct swit switches[] = { #define DATESW 0 "datefield field", 0, #define VERBSW 1 "verbose", 0, #define NVERBSW 2 "noverbose", 0, #define SUBJSW 3 "subject", 0, #define HELPSW 4 "help", 4, NULL, NULL }; struct smsg { int s_msg; unsigned long s_clock; char *s_subj; }; static struct smsg *smsgs; int nmsgs; int subjsort; /* sort on subject if != 0 */ int verbose; int dsort (); int subsort (); /* ARGSUSED */ main (argc, argv) int argc; char **argv; { int msgp = 0; int i; int msgnum; char *cp; char *maildir; char *datesw = NULL; char *folder = NULL; char buf[100]; char **ap; char **argp; char *arguments[MAXARGS]; char *msgs[MAXARGS]; struct msgs *mp; struct smsg **dlist; invo_name = r1bindex (argv[0], '/'); if ((cp = m_find (invo_name)) != NULL) { ap = brkstring (cp = getcpy (cp), " ", "\n"); ap = copyip (ap, arguments); } else ap = arguments; (void) copyip (argv + 1, ap); argp = arguments; while (cp = *argp++) { if (*cp == '-') switch (smatch (++cp, switches)) { case AMBIGSW: ambigsw (cp, switches); done (1); case UNKWNSW: adios (NULLCP, "-%s unknown", cp); case HELPSW: (void) sprintf(buf, "%s [+folder] [msgs] [switches]", invo_name); help (buf, switches); done (1); case DATESW: if (datesw) adios (NULLCP, "only one date field at a time"); if (!(datesw = *argp++) || *datesw == '-') adios (NULLCP, "missing argument to %s", argp[-2]); continue; case SUBJSW: subjsort = 1; continue; case VERBSW: verbose++; continue; case NVERBSW: verbose = 0; continue; } if (*cp == '+' || *cp == '@') { if (folder) adios (NULLCP, "only one folder at a time!"); else folder = path (cp + 1, *cp == '+' ? TFOLDER : TSUBCWF); } else msgs[msgp++] = cp; } if (!m_find ("path")) free (path ("./", TFOLDER)); if (!msgp) msgs[msgp++] = "all"; if (!datesw) datesw = "date"; if (!folder) folder = m_getfolder (); maildir = m_maildir (folder); if (chdir (maildir) == NOTOK) adios (maildir, "unable to change directory to"); if (!(mp = m_gmsg (folder))) adios (NULLCP, "unable to read folder %s", folder); if (mp->hghmsg == 0) adios (NULLCP, "no messages in %s", folder); for (msgnum = 0; msgnum < msgp; msgnum++) if (!m_convert (mp, msgs[msgnum])) done (1); m_setseq (mp); if ((nmsgs = read_hdrs (mp, datesw)) <= 0) adios (NULLCP, "no messages to sort"); /* * sort a list of pointers to our "messages to be sorted". */ dlist = (struct smsg **) malloc ((nmsgs+1) * sizeof(*dlist)); if (! dlist) adios (NULLCP, "couldn't allocate sort memory"); for (i = 0; i < nmsgs; i++) dlist[i] = &smsgs[i]; dlist[nmsgs] = 0; qsort ((char *) dlist, nmsgs, sizeof(*dlist), dsort); /* * if we're sorting on subject, we need another list * in subject order, then a merge pass to collate the * two sorts. */ if (subjsort) { struct smsg **slist; struct smsg **flist; register struct smsg ***il; register struct smsg **fp; register struct smsg **dp; slist = (struct smsg **) malloc ((nmsgs+1) * sizeof(*slist)); if (! slist) adios (NULLCP, "couldn't allocate sort memory"); bcopy ((char *)dlist, (char *)slist, (nmsgs+1)*sizeof(*slist)); qsort ((char *)slist, nmsgs, sizeof(*slist), subsort); /* * make an inversion list so we can quickly find * the collection of messages with the same subj * given a message number. */ il = (struct smsg ***) calloc (mp->hghsel+1, sizeof(*il)); if (! il) adios (NULLCP, "couldn't allocate msg list"); for (i = 0; i < nmsgs; i++) il[slist[i]->s_msg] = &slist[i]; /* * make up the final list, chronological but with * all the same subjects grouped together. */ flist = (struct smsg **) malloc ((nmsgs+1) * sizeof(*flist)); if (! flist) adios (NULLCP, "couldn't allocate msg list"); fp = flist; for (dp = dlist; *dp;) { register struct smsg **s = il[(*dp++)->s_msg]; /* see if we already did this guy */ if (! s) continue; *fp++ = *s++; /* * take the next message(s) if there is one, * its subject isn't null and its subject * is the same as this one. */ while (*s && (*s)->s_subj[0] && strcmp((*s)->s_subj, s[-1]->s_subj) == 0) { il[(*s)->s_msg] = 0; *fp++ = *s++; } } *fp = 0; (void) free (slist); (void) free (dlist); dlist = flist; } rename_msgs (mp, dlist); m_replace (pfolder, folder); m_sync (mp); m_update (); done (0); } static int read_hdrs (mp, datesw) register struct msgs *mp; register char *datesw; { int msgnum; struct tws tb; register struct smsg *s; twscopy (&tb, dtwstime ()); smsgs = (struct smsg *) calloc ((unsigned) (mp->hghsel - mp->lowsel + 2), sizeof *smsgs); if (smsgs == NULL) adios (NULLCP, "unable to allocate sort storage"); s = smsgs; for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) { if (mp->msgstats[msgnum] & SELECTED) { if (getws (datesw, msgnum, s)) { s->s_msg = msgnum; s++; } } } s->s_msg = 0; nmsgs = s - smsgs; } static getws (datesw, msg, smsg) register char *datesw; int msg; register struct smsg *smsg; { int compnum; register int state; char *msgnam; char buf[BUFSIZ], nam[NAMESZ]; register struct tws *tw; register char *datecomp = NULLCP; register char *subjcomp = NULLCP; register FILE *in; if ((in = fopen (msgnam = m_name (msg), "r")) == NULL) { admonish (msgnam, "unable to read message"); return (0); } for (compnum = 1, state = FLD;;) { switch (state = m_getfld (state, nam, buf, sizeof buf, in)) { case FLD: case FLDEOF: case FLDPLUS: compnum++; if (uleq (nam, datesw)) { datecomp = add (buf, datecomp); while (state == FLDPLUS) { state = m_getfld (state, nam, buf, sizeof buf, in); datecomp = add (buf, datecomp); } if (!subjsort || subjcomp) break; } else if (subjsort && uleq (nam, "subject")) { subjcomp = add (buf, subjcomp); while (state == FLDPLUS) { state = m_getfld (state, nam, buf, sizeof buf, in); subjcomp = add (buf, subjcomp); } if (datecomp) break; } else { /* just flush this guy */ while (state == FLDPLUS) state = m_getfld (state, nam, buf, sizeof buf, in); } continue; case BODY: case BODYEOF: case FILEEOF: break; case LENERR: case FMTERR: if (state == LENERR || state == FMTERR) admonish (NULLCP, "format error in message %d (header #%d)", msg, compnum); if (datecomp) free (datecomp); if (subjcomp) free (subjcomp); (void) fclose (in); return (0); default: adios (NULLCP, "internal error -- you lose"); } break; } if (!datecomp || (tw = dparsetime (datecomp)) == NULL) { struct stat st; admonish (NULLCP, "can't parse %s field in message %d", datesw, msg); /* use the modify time of the file as its date */ (void) fstat (fileno (in), &st); smsg->s_clock = st.st_mtime; } else { smsg->s_clock = twclock (tw); } if (subjsort) { register char *cp; register char *cp2; register char c; if (!subjcomp) subjcomp = ""; /* * try to make the subject "canonical": delete leading "re:", * punctuation, white space & smash everything to lower case. */ cp = subjcomp; cp2 = subjcomp; while (c = *cp++) if (isupper (c)) *cp2++ = tolower (c); else if (isalnum (c) || c == ':') *cp2++ = c; *cp2 = '\0'; while (subjcomp[0] == 'r' && subjcomp[1] == 'e' && subjcomp[2] == ':') subjcomp += 3; smsg->s_subj = subjcomp; } (void) fclose (in); if (datecomp) free (datecomp); return (1); } /* * sort on dates. */ static int dsort (a, b) register struct smsg **a, **b; { if ((*a)->s_clock < (*b)->s_clock) return (-1); else if ((*a)->s_clock > (*b)->s_clock) return (1); else if ((*a)->s_msg < (*b)->s_msg) return (-1); else return (1); } /* * sort on subjects. */ static int subsort (a, b) register struct smsg **a, **b; { register int i; if (i = strcmp ((*a)->s_subj, (*b)->s_subj)) return (i); return (dsort (a, b)); } static rename_msgs (mp, mlist) register struct msgs *mp; register struct smsg **mlist; { register int i, j, old, new; register struct smsg *sp; short stats; char f1[BUFSIZ], f2[BUFSIZ], tmpfil[BUFSIZ]; (void) strcpy (tmpfil, m_scratch ("", invo_name)); for (i = 0; i < nmsgs; i++) { if (! (sp = mlist[i])) continue; /* did this one */ j = sp - smsgs; if (j == i) continue; /* this one doesn't move */ /* * the guy that was msg j is about to become msg i. * rename 'j' to make a hole, then recursively rename * guys to fill up the hole. */ old = smsgs[j].s_msg; new = smsgs[i].s_msg; (void) strcpy (f1, m_name (old)); if (verbose) printf ("renaming chain from %d to %d\n", old, new); if (rename (f1, tmpfil) == NOTOK) adios (tmpfil, "unable to rename %s to ", f1); stats = mp->msgstats[old]; rename_chain (mp, mlist, j, i); if (rename (tmpfil, m_name(new)) == NOTOK) adios (m_name(new), "unable to rename %s to", tmpfil); mp->msgstats[new] = stats; mp->msgflags |= SEQMOD; } } rename_chain (mp, mlist, msg, endmsg) register struct msgs *mp; struct smsg **mlist; int msg, endmsg; { int nxt, old, new; char *newname; char oldname[BUFSIZ]; nxt = mlist[msg] - smsgs; mlist[msg] = 0; old = smsgs[nxt].s_msg; new = smsgs[msg].s_msg; (void) strcpy (oldname, m_name (old)); newname = m_name (new); if (verbose) printf (" %s becomes %s\n", oldname, newname); if (rename (oldname, newname) == NOTOK) adios (newname, "unable to rename %s to", oldname); mp->msgstats[new] = mp->msgstats[old]; if (mp->curmsg == old) m_setcur (mp, new); if (nxt != endmsg) rename_chain (mp, mlist, nxt, endmsg); }