Xref: utzoo comp.sources.d:3026 news.admin:4137
Path: utzoo!utgpu!watmath!clyde!att!rutgers!tut.cis.ohio-state.edu!bloom-beacon!bu-cs!purdue!decwrl!reid
From: reid@decwrl.DEC.COM (Brian Reid)
Newsgroups: comp.sources.d,news.admin
Subject: arbitron program (v2.4.2--last updated 4 June 1987)
Message-ID: <16542@decwrl.dec.com>
Date: 1 Dec 88 11:53:10 GMT
Sender: reid@decwrl.dec.com
Organization: DEC Western Research Laboratory
Lines: 237
Approved: reid

This is the script for the "arbitron" system that is used to produce the
data for the monthly USENET readership surveys in news.lists. It is not so
much a "source" as it is a piece of USENET folklore, posted every month so
that new people will be sure to find it. (CF the "History of USENET" articles
posted every month). It is posted to these newsgroups rather than to a source
group so that it will be widely available but will not be archived.

#! /bin/sh
# @(#)arbitron	2.4.2	06/05/87
# arbitron -- this program produces rating sweeps for USENET.
#
# Usage: arbitron
#
# To use this program, edit the "configuration" section below so that the
# information is correct for your site, and then run it. It will produce a
# readership survey for your machine and mail that survey to decwrl.dec.com,
# with a cc to you.
#
# To participate in the international monthly ratings sweeps, 
# run "arbitron" every month. I will run the statistics program on the first
# day of each month; it will include any report that has reached it by that
# time. To make sure your site's data is included, run the survey program no
# later than the 20th day of each month.
#
# Brian Reid, DEC Western Research Lab, reid@decwrl.dec.com
# Updated and bugfixed by 
#	Spencer Thomas, U.of Utah
#	Geoff Kuenning, SAH Consulting
# Updated to work with 2.10.1 and older news systems by
#	Lindsay Cleveland, AT&T Technologies/Bell Labs
# Made to work with 16-bit address spaces by
#	Andy Walker, Maths Dept., University of Nottingham, UK
# Nagging Bourne shell bug fixed by
#	Tom Donahue, Rabbit Software Corp
#
# Note that the results of this program are dependent on the rate at which
# you expire news.  If you are a small site that expires news rapidly, the
# results may indicate fewer active readers than you actually have.
#
###########################################################################
# Configuration information. Edit this section to reflect your site data. #
TMPDIR=/tmp
NEWS=/usr/lib/news
SPOOL=/usr/spool/news

# Make a crude stab at determining the system type. If your installation has
# only one type of system, you can edit out the "if" statement and just turn
# this into an assignment statement of the correct value.
if [ -d /usr/ucb ]
then
    STYPE="bsd"
else
    STYPE="usg"
fi

# Range of /etc/passwd UID's that represent actual people (rather than
# maintenance accounts or daemons or whatever)
lowUID=5
highUID=9999

# If you aren't running a distributed news system (nntpd & rrn, usually),
# leave NEWSHOST blank. Else set it to the name of the host from which you
# can rcp a copy of the active file.
NEWSHOST=

# uucp path: {sun, hplabs, pyramid, decvax, ucbvax}!decwrl!netsurvey
summarypath="netsurvey@decwrl.dec.com $USER"


# We need to find the uucp name of your host. If this code doesn't work,
# then just put it in literally like this:
#	hostname="ihnp4"

case $STYPE in
	bsd) cmd='hostname || uuname -l';;
	sysv)cmd='uname -n || uuname -l || hostname';;
	*)   cmd='uuname -l';;
esac;

hostname=`sh -c "$cmd" 2>&-`

PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
############################################################################
export PATH
# ---------------------------------------------------------------------------
trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
set `date`
dat="$2$6"
destination="${MAILER-mail} $summarypath"

################################
# Here are several expressions, each of which figures out approximately how
# many people use this machine. Comment out all but 1 of them; pick the one
# you like best. Initially the most universal but least reliable of them is
# uncommented.
# # ###### Scheme #1: fast but usually returns too big a number
nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}"  $TMPDIR/arb.fmt.$$ << 'DOG'
# makereport -- utility for "arbitron". Early versions were copied from a
# similar script distributed with "subscribers.sh" by Blonder, McCreery, and
# Herron.
#
	BEGIN	{ rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
#
# Active file line:  dispose of previous group (if any), record group, and
# record first and last article numbers.  Set group's reader count to none.
	NF == 4 { if (grpname != "") {
			printf("%d %s\n",grpcount, grpname)
		  }
		  grpname = $1
		  grpfirst = $3
		  grplast = $2
		  grpcount = 0
		}
#
# .newsrc line.  Break out the final number, which is the last article that
# has actually been read.  This is a pretty good indicator of the person's
# true interest in the group.  If 'lastread' for the group is a current
# (unexpired) article, record a reader for that group.  Finally, record
# the user as a "real" user of the news system.
#
	NF == 3 { if ($1 != grpname) next;
		  n1 = split($3, n2, "-")
		  n3 = split(n2[n1], n4, ",")
		  lastread = n4[n3]
	if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
			grpcount++
			if (realuser[$2] != 1) {
			    realuser[$2] = 1
			    realusers++
			}
		  }
		}
#
# End of file.  Print the report in 2 columns.
	END	{ printf("9999 Host\t\t%s\n","HOSTNAME")
		  printf("9998 Users\t\t%d\n",NUSERS)
		  printf("9997 NetReaders\t%d\n",realusers)
		  printf("9996 ReportDate\t%s\n","DATE")
		  printf("9995 SystemType\tnews-arbitron-2.4\n")
# For reorganized network, report a group even if nobody reads it. This will
# help us keep track of where the groups propagate.
		  printf("%d %s\n",grpcount, grpname)
		}
DOG

cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
BEGIN	{ seen["/"]=1; seen[""] = 1; }
	{ if (seen[$6]!=1) {
		printf("if [ -r %s/.newsrc ] ; then ", $6)
		printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
		seen[$6]=1;
	  }
}
MOUSE

# First, make sure we have an active file
if [ -z "$NEWSHOST" ]
then ACTIVE=$NEWS/active
else ACTIVE=/tmp/arb.active.$$
     rcp $NEWSHOST:$NEWS/active $ACTIVE
fi

if [ ! -s $ACTIVE ]
then
    echo arbitron: ACTIVE file missing or empty. Cannot continue.
    exit 1
fi

# Next, get the list of .newsrc files with duplicates and unreadable files
# removed.
awk -F: -f $TMPDIR/arb.pwd.$$ $TMPDIR/arb.tmp.$$

# Check to make sure that we found some
if [ -s $TMPDIR/arb.tmp.$$ ]
then # See if "active" file has 4 fields or only two (pre-2.10.2)
     set `sed 1q < $ACTIVE`
     if [ $# -eq 2 ]
     then egrep  '^[a-z]*\.' $ACTIVE |
	  while read group last
	  do dir=`echo "$group" | sed 's;\.;/;g'`
	     first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
	     case $STYPE in
		usg) echo "$group $last ${first:-$last} X";;
		  *) echo "$group $last ${first-$last} X"
	     esac
	  done
     else egrep '^[a-z]*\.' $ACTIVE
     fi |
     sort - $TMPDIR/arb.tmp.$$ |
     awk -f $TMPDIR/arb.fmt.$$ |
     sort -nr |
     sed '/^$/d
	  s/^999[0-9] //' |
     $destination
else echo Unable to find any readable .newsrc files 2>&1
     exit 1
fi