/*
 * top - a top users display for Unix
 *
 * SYNOPSIS:  POWER and POWER2 running AIX 3.2.5.0
 *
 * DESCRIPTION:
 * This is the machine-dependent module for AIX 3.2.5.0
 * It is tested on all POWER architectures.
 *
 * TERMCAP: -lcurses
 *
 * CFLAGS: -DORDER
 *
 * AUTHOR:  Erik Deumens <deumens@qtp.ufl.edu>
 */

#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <nlist.h>
#include <sys/sysinfo.h>
#include <procinfo.h>
#include <sys/proc.h>
/*
#include <sys/var.h>
*/
#include <pwd.h>
#include "top.h"
#include "machine.h"


#define PROCRESS(p) (((p)->u.ui_trss + (p)->u.ui_drss)*4)
#define PROCSIZE(p) (((p)->u.ui_tsize/1024+(p)->u.ui_dvm)*4)
#define PROCTIME(pi) (pi->u.ui_ru.ru_utime.tv_sec + pi->u.ui_ru.ru_stime.tv_sec)

/*
 * structure procsinfo exists in AIX 4.1 and is constructed here by combining
 * procinfo and userinfo which exists in AIX 3.2 also.
 */
struct procsinfo {
  struct procinfo p;
  struct userinfo u;
};

/*
 * structure definition taken from 'monitor' by Jussi Maki (jmaki@hut.fi)
 */
struct vmker {
    uint n0,n1,n2,n3,n4,n5,n6,n7,n8;
    uint totalmem;
    uint badmem; /* this is used in RS/6000 model 220 */
    uint freemem;
    uint n12;
    uint numperm;   /* this seems to keep other than text and data segment 
                       usage; name taken from /usr/lpp/bos/samples/vmtune.c */
    uint totalvmem,freevmem;
    uint n15, n16, n17, n18, n19;
};


#define KMEM "/dev/kmem"

/* Indices in the nlist array */
#define X_AVENRUN       0
#define X_SYSINFO       1
#define X_VMKER         2
#define X_PROC          3
#define X_V             4

static struct nlist nlst[] = {
    { "avenrun", 0, 0, 0, 0, 0 }, /* 0 */
    { "sysinfo", 0, 0, 0, 0, 0 }, /* 1 */
    { "vmker",   0, 0, 0, 0, 0 }, /* 2 */
    { "proc",    0, 0, 0, 0, 0 }, /* 3 */
    { "v",       0, 0, 0, 0, 0 }, /* 4 */
    {  NULL, 0, 0, 0, 0, 0 }
};


/* get_process_info returns handle. definition is here */
struct handle
{
	struct procsinfo **next_proc;
	int remaining;
};

/*
 *  These definitions control the format of the per-process area
 */
static char header[] =
  "  PID X        PRI NICE   SIZE   RES STATE   TIME   WCPU    CPU COMMAND";
/* 0123456   -- field to fill in starts at header+6 */
#define UNAME_START 6

#define Proc_format \
	"%5d %-8.8s %3d %4d %5d%c %4d%c %-5s %6s %5.2f%% %5.2f%% %.14s%s"


/* these are for detailing the process states */
int process_states[9];
char *procstatenames[] = {
    " none, ", " sleeping, ", " state2, ", " runnable, ",
    " idle, ", " zombie, ", " stopped, ", " running, ", " swapped, ",
    NULL
};


/* these are for detailing the cpu states */
int cpu_states[4];
char *cpustatenames[] = {
    "idle", "user", "kernel", "wait",
    NULL
};

/* these are for detailing the memory statistics */
int memory_stats[7];
char *memorynames[] = {
    "M Total. Real: ", "M, ", "M Free, ", "M Buffers. Virtual: ", "M, ", "M Free, ", NULL
};
#define M_TOTAL	   0
#define M_REAL     1
#define M_REALFREE 2
#define M_BUFFERS  3
#define M_VIRTUAL  4
#define M_VIRTFREE 5

char *state_abbrev[] = {
    "", "sleep", "", "run", "sleep", "zomb", "stop", "run", "swap"
};

/* sorting orders. first is default */
char *ordernames[] = {
    "cpu", "size", "res", "time", "pri", NULL
};

/* compare routines */
int compare_cpu(), compare_size(), compare_res(), compare_time(), 
    compare_prio();

int (*proc_compares[])() = {
    compare_cpu,
    compare_size,
    compare_res,
    compare_time,
    compare_prio,
    NULL
};

/* useful externals */
extern int errno;
extern char *sys_errlist[];
long lseek();
long time();
long percentages();


/* useful globals */
int kmem;			/* file descriptor */

/* offsets in kernel */
static unsigned long avenrun_offset;
static unsigned long sysinfo_offset;
static unsigned long vmker_offset;
static unsigned long proc_offset;
static unsigned long v_offset;

/* used for calculating cpu state percentages */
static long cp_time[CPU_NTIMES];
static long cp_old[CPU_NTIMES];
static long cp_diff[CPU_NTIMES];

/* the runqueue length is a cumulative value. keep old value */
long old_runque;

/* process info */
/*struct var v_info;*/		/* to determine nprocs */
int nprocs;			/* maximum nr of procs in proctab */
int ncpus;			/* nr of cpus installed */

int ptsize;			/* size of process table in bytes */
struct proc *p_proc;		/* a copy of the process table */
struct procsinfo *p_info;	/* needed for vm and ru info */
struct procinfo *p_infop;       /* return array for getproc call */
struct procsinfo **pref;	/* processes selected for display */
int pref_len;			/* number of processes selected */

/* needed to calculate WCPU */
unsigned long curtime;


/*
 * Initialize globals, get kernel offsets and stuff...
 */
machine_init(statics)
    struct statics *statics;
{
    if ((kmem = open(KMEM, O_RDONLY)) == -1) {
	perror(KMEM);
	return -1;
    }

    /* get kernel symbol offsets */
    if (knlist(nlst, 5, sizeof(struct nlist)) != 0) {
	perror("knlist");
	return -1;
    }
    avenrun_offset = nlst[X_AVENRUN].n_value;
    sysinfo_offset = nlst[X_SYSINFO].n_value;
    vmker_offset   = nlst[X_VMKER].n_value;
    proc_offset    = nlst[X_PROC].n_value;
    v_offset       = nlst[X_V].n_value;

    ncpus = 1;                 /* number of cpus, AIX 3.2 has only 1 CPU */
    nprocs = PROCMASK(NPROC);

    ptsize = nprocs * sizeof (struct proc);
    p_proc = (struct proc *)malloc(ptsize);
    p_info = (struct procsinfo *)malloc(nprocs * sizeof (struct procsinfo));
    p_infop = (struct procinfo *)malloc(nprocs * sizeof (struct procinfo));
    pref = (struct procsinfo **)malloc(nprocs * sizeof (struct procsinfo *));

    if (!p_proc || !p_info || !p_infop || !pref) {
	fprintf(stderr, "top: not enough memory\n");
	return -1;
    }

    statics->procstate_names = procstatenames;
    statics->cpustate_names = cpustatenames;
    statics->memory_names = memorynames;
    statics->order_names = ordernames;

    return(0);
}



char *format_header(uname_field)
    register char *uname_field;
{
    register char *ptr;

    ptr = header + UNAME_START;
    while (*uname_field != '\0')
    {
	*ptr++ = *uname_field++;
    }

    return(header);
}




get_system_info(si)
    struct system_info *si;
{
    int load_avg[3];
    struct sysinfo s_info;
    struct vmker m_info;
    int i;
    double total = 0;

    /* get the load avarage array */
    getkval(avenrun_offset, (caddr_t)load_avg, sizeof load_avg, "avenrun");

    /* get the sysinfo structure */
    getkval(sysinfo_offset, (caddr_t)&s_info, sizeof s_info, "sysinfo");

    /* get vmker structure */
    getkval(vmker_offset, (caddr_t)&m_info, sizeof m_info, "vmker");

    /* convert load avarages to doubles */
    for (i = 0; i < 3; i++)
	si->load_avg[i] = (double)load_avg[i]/65536.0;

    /* calculate cpu state in percentages */
    for (i = 0; i < CPU_NTIMES; i++) {
	cp_old[i] = cp_time[i];
	cp_time[i] = s_info.cpu[i];
	cp_diff[i] = cp_time[i] - cp_old[i];
	total += cp_diff[i];
    }

    total = total/1000.0;  /* top itself will correct this */
    for (i = 0; i < CPU_NTIMES; i++) {
        cpu_states[i] = cp_diff[i] / total;
    }

    /* calculate memory statistics, scale 4K pages to megabytes */
#define PAGE_TO_MB(a) ((a)*4/1024)
    memory_stats[M_TOTAL]    = PAGE_TO_MB(m_info.totalmem+m_info.totalvmem);
    memory_stats[M_REAL]     = PAGE_TO_MB(m_info.totalmem);
    memory_stats[M_REALFREE] = PAGE_TO_MB(m_info.freemem);
    memory_stats[M_BUFFERS]  = PAGE_TO_MB(m_info.numperm);
    memory_stats[M_VIRTUAL]  = PAGE_TO_MB(m_info.totalvmem);
    memory_stats[M_VIRTFREE] = PAGE_TO_MB(m_info.freevmem);

    /* runnable processes */
    process_states[0] = s_info.runque - old_runque;
    old_runque = s_info.runque;

    si->cpustates = cpu_states;
    si->memory = memory_stats;
}

static struct handle handle;

caddr_t get_process_info(si, sel, compare)
    struct system_info *si;
    struct process_select *sel;
    int (*compare)();
{
    int i, nproc, st;
    int active_procs = 0, total_procs = 0;
    struct procsinfo *pp, **p_pref = pref;
    unsigned long pctcpu;
    pid_t procsindex = 0;
    struct proc *p;

    si->procstates = process_states;

    curtime = time(0);

    /* get the procinfo structures of all running processes */
    nproc = getproc(p_infop, nprocs, sizeof (struct procinfo));
    if (nproc < 0) {
	perror("getproc");
	exit(1);
    }
    for (i=0; i<nproc; i++) {
      st = getuser(&p_infop[i],sizeof(struct procinfo),
		   &p_info[i].u,sizeof(struct userinfo));
      if (st==-1) p_infop[i].pi_stat = SZOMB; 
      memcpy (&p_info[i].p,&p_infop[i],sizeof(struct procinfo));
    }

    /* the swapper has no cmd-line attached */
    strcpy(p_info[0].u.ui_comm, "swapper");
    
    /* get proc table */
    getkval(proc_offset, (caddr_t)p_proc, ptsize, "proc");

    memset(process_states, 0, sizeof process_states);

    /* build a list of pointers to processes to show. walk through the
     * list of procsinfo structures instead of the proc table since the
     * mapping of procsinfo -> proctable is easy, the other way around
     * is cumbersome
     */
    for (pp = p_info, i = 0; i < nproc; pp++, i++) {

	p = &p_proc[PROCMASK(pp->p.pi_pid)];

        if (pp->p.pi_stat && (sel->system || ((pp->p.pi_flag & SKPROC) == 0))) {
	    total_procs++;
	    process_states[p->p_stat]++;
	    if ( (pp->p.pi_stat != SZOMB) &&
		(sel->idle || p->p_cpticks != 0 /*|| (p->p_stat == SACTIVE)*/)
		&& (sel->uid == -1 || pp->p.pi_uid == (uid_t)sel->uid)) {
                *p_pref++ = pp;
		active_procs++;
	    }
	}
    }   

    /* the pref array now holds pointers to the procsinfo structures in
     * the p_info array that were selected for display
     */

    /* sort if requested */
    if (compare != NULL)
	qsort((char *)pref, active_procs, sizeof (struct procsinfo *), 
	      compare);
    
    si->last_pid = -1;		/* no way to figure out last used pid */
    si->p_total = total_procs;
    si->p_active = pref_len = active_procs;

    handle.next_proc = pref;
    handle.remaining = active_procs;

    return((caddr_t)&handle);
}

char fmt[128];		/* static area where result is built */

/* define what weighted cpu is. use definition of %CPU from 'man ps(1)' */
#define weighted_cpu(pp) (PROCTIME(pp) == 0 ? 0.0 : \
                        (((PROCTIME(pp)*100.0)/(curtime-pi->u.ui_start)/ncpus)))
#define double_pctcpu(p) ((double)p->p_pctcpu/(double)FLT_MODULO)

char *format_next_process(handle, get_userid)
    caddr_t handle;
    char *(*get_userid)();
{
    register struct handle *hp;
    register struct procsinfo *pi;
    register struct proc *p;
    char *uname;
    long cpu_time;
    int proc_size, proc_ress;
    char size_unit = 'K';
    char ress_unit = 'K';

    hp = (struct handle *)handle;
    if (hp->remaining == 0) {	/* safe guard */
	fmt[0] = '\0';
	return fmt;
    }
    pi = *(hp->next_proc++);
    hp->remaining--;
    p = &p_proc[PROCMASK(pi->p.pi_pid)];

    cpu_time = PROCTIME(pi);

    /* we disply sizes up to 10M in KiloBytes, beyond 10M in MegaBytes */
    if ((proc_size = (pi->u.ui_tsize/1024+pi->u.ui_dvm)*4) > 10240) {
	proc_size /= 1024;
	size_unit = 'M';
    }
    if ((proc_ress = (pi->u.ui_trss + pi->u.ui_drss)*4) > 10240) {
	proc_ress /= 1024;
	ress_unit = 'M';
    }

    sprintf(fmt, Proc_format ,
            pi->p.pi_pid,					  /* PID */
            (*get_userid)(pi->u.ui_uid),			  /* login name */
            getpriority(PRIO_PROCESS, pi->p.pi_pid),
	    EXTRACT_NICE(p),				  /* fixed or vari */
            proc_size,					  /* size */
            size_unit,					  /* K or M */
            proc_ress,					  /* resident */
            ress_unit,					  /* K or M */
            state_abbrev[p->p_stat],			  /* process state */
            format_time(cpu_time),			  /* time used */
	    weighted_cpu(pi),	                          /* WCPU */
	    100.0 * double_pctcpu(p),                     /* CPU */
            printable(pi->u.ui_comm),                       /* COMM */
	    (pi->p.pi_flag & SKPROC) == 0 ? "" : " (sys)"  /* kernel process? */
	    );
    return(fmt);
}


/*
 *  getkval(offset, ptr, size, refstr) - get a value out of the kernel.
 *	"offset" is the byte offset into the kernel for the desired value,
 *  	"ptr" points to a buffer into which the value is retrieved,
 *  	"size" is the size of the buffer (and the object to retrieve),
 *  	"refstr" is a reference string used when printing error meessages,
 *	    if "refstr" starts with a '!', then a failure on read will not
 *  	    be fatal (this may seem like a silly way to do things, but I
 *  	    really didn't want the overhead of another argument).
 *  	
 */
getkval(offset, ptr, size, refstr)
    unsigned long offset;
    caddr_t ptr;
    int size;
    char *refstr;
{
    int upper_2gb = 0;

    /* reads above 2Gb are done by seeking to offset%2Gb, and supplying
     * 1 (opposed to 0) as fourth parameter to readx (see 'man kmem')
     */
    if (offset > 1<<31) {
	upper_2gb = 1;
	offset &= 0x7fffffff;
    }

    if (lseek(kmem, offset, SEEK_SET) != offset) {
	fprintf(stderr, "top: lseek failed\n");
	exit(-1);
    }

    if (readx(kmem, ptr, size, upper_2gb) != size) {
	if (*refstr == '!')
	    return 0;
	else {
	    fprintf(stderr, "top: kvm_read for %s: %s\n", refstr,
		    sys_errlist[errno]);
	    exit(-1);
	}
    }

    return 1 ;
}
    
/* comparison routine for qsort */
/*
 * The following code is taken from the solaris module and adjusted
 * for AIX -- JV .
 */

#define ORDERKEY_PCTCPU \
           if (lresult = p2->p_pctcpu - p1->p_pctcpu, \
               (result = lresult > 0 ? 1 : lresult < 0 ? -1 : 0) == 0)

#define ORDERKEY_CPTICKS \
           if ((result = PROCTIME(pi2) - PROCTIME(pi1)) == 0)


#define ORDERKEY_STATE \
           if ((result = sorted_state[p2->p_stat]  \
                         - sorted_state[p1->p_stat])  == 0)

/* Nice values directly reflect the process' priority, and are always >0 ;-) */
#define ORDERKEY_PRIO \
	   if ((result = EXTRACT_NICE(p1) - EXTRACT_NICE(p2)) == 0) 

#define ORDERKEY_RSSIZE \
           if ((result = PROCRESS(pi2) - PROCRESS(pi1)) == 0)
#define ORDERKEY_MEM \
           if ((result = PROCSIZE(pi2) - PROCSIZE(pi1)) == 0)

static unsigned char sorted_state[] =
{
    0, /* not used */
    0,
    0,
    0,
    3,                          /* sleep */
    1,				/* zombie */
    4,				/* stop */
    6,				/* run */
    2,				/* swap */
};

/* compare_cpu - the comparison function for sorting by cpu percentage */

int
compare_cpu(ppi1, ppi2)
    struct procsinfo **ppi1;
    struct procsinfo **ppi2;
{
    register struct procsinfo *pi1 = *ppi1, *pi2 = *ppi2;
    register struct proc *p1;
    register struct proc *p2;
    register int result;
    register long lresult;

    p1 = &p_proc[PROCMASK(pi1->p.pi_pid)];
    p2 = &p_proc[PROCMASK(pi2->p.pi_pid)];

    ORDERKEY_PCTCPU
    ORDERKEY_CPTICKS
    ORDERKEY_STATE
    ORDERKEY_PRIO
    ORDERKEY_RSSIZE
    ORDERKEY_MEM
    ;

    return result;
}
    

/* compare_size - the comparison function for sorting by total memory usage */

int
compare_size(ppi1, ppi2)
    struct procsinfo **ppi1;
    struct procsinfo **ppi2;
{
    register struct procsinfo *pi1 = *ppi1, *pi2 = *ppi2;
    register struct proc *p1;
    register struct proc *p2;
    register int result;
    register long lresult;

    p1 = &p_proc[PROCMASK(pi1->p.pi_pid)];
    p2 = &p_proc[PROCMASK(pi2->p.pi_pid)];

    ORDERKEY_MEM
    ORDERKEY_RSSIZE
    ORDERKEY_PCTCPU
    ORDERKEY_CPTICKS
    ORDERKEY_STATE
    ORDERKEY_PRIO
    ;

    return result;
}
    

/* compare_res - the comparison function for sorting by resident set size */

int
compare_res(ppi1, ppi2)
    struct procsinfo **ppi1;
    struct procsinfo **ppi2;
{
    register struct procsinfo *pi1 = *ppi1, *pi2 = *ppi2;
    register struct proc *p1;
    register struct proc *p2;
    register int result;
    register long lresult;

    p1 = &p_proc[PROCMASK(pi1->p.pi_pid)];
    p2 = &p_proc[PROCMASK(pi2->p.pi_pid)];

    ORDERKEY_RSSIZE
    ORDERKEY_MEM
    ORDERKEY_PCTCPU
    ORDERKEY_CPTICKS
    ORDERKEY_STATE
    ORDERKEY_PRIO
    ;

    return result;
}
    

/* compare_time - the comparison function for sorting by total cpu time */

int
compare_time(ppi1, ppi2)
    struct procsinfo **ppi1;
    struct procsinfo **ppi2;
{
    register struct procsinfo *pi1 = *ppi1, *pi2 = *ppi2;
    register struct proc *p1;
    register struct proc *p2;
    register int result;
    register long lresult;

    p1 = &p_proc[PROCMASK(pi1->p.pi_pid)];
    p2 = &p_proc[PROCMASK(pi2->p.pi_pid)];

    ORDERKEY_CPTICKS
    ORDERKEY_PCTCPU
    ORDERKEY_STATE
    ORDERKEY_PRIO
    ORDERKEY_MEM
    ORDERKEY_RSSIZE
    ;

    return result;
}
    

/* compare_prio - the comparison function for sorting by cpu percentage */

int
compare_prio(ppi1, ppi2)
    struct procsinfo **ppi1;
    struct procsinfo **ppi2;
{
    register struct procsinfo *pi1 = *ppi1, *pi2 = *ppi2;
    register struct proc *p1;
    register struct proc *p2;
    register int result;
    register long lresult;

    p1 = &p_proc[PROCMASK(pi1->p.pi_pid)];
    p2 = &p_proc[PROCMASK(pi2->p.pi_pid)];

    ORDERKEY_PRIO
    ORDERKEY_PCTCPU
    ORDERKEY_CPTICKS
    ORDERKEY_STATE
    ORDERKEY_RSSIZE
    ORDERKEY_MEM
    ;

    return result;
}
    

proc_owner(pid)
int pid;
{
   int uid;
   register struct procsinfo **prefp = pref;
   register int cnt = pref_len;

   while (--cnt >= 0) {
       if ((*prefp)->p.pi_pid == pid)
	   return (*prefp)->p.pi_uid;
       prefp++;
   }
   
   return(-1);
}

