/* 
 * Prospect: a developer's system profiler.
 *
 * COPYRIGHT (C) 2001-2004 Hewlett-Packard Company
 *
 * Author: Alex Tsariounov, HP
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/* $Id: rec_proc.c,v 1.34 2004/01/09 20:29:28 type2 Exp $ */

/*
 *******************************************************************************
 *
 *                            PROSPECT PROJECT
 *                        Linux Trace Processing Module
 *
 *******************************************************************************
 */

#ifndef __LINT__
static const char gRCSid[] = "@(#) $Id: rec_proc.c,v 1.34 2004/01/09 20:29:28 type2 Exp $";
#endif

/*
 * ------------------------ System Header Files -------------------------
 */
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/param.h>
#include <dirent.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <signal.h>

/* missing from header unistd.h */
int setresuid(uid_t ruid, uid_t euid, uid_t suid);

/*
 * ----------------------- Prospect Header Files ------------------------
 */
#include "../dtree/dtree.h"
#include "prospect.h"
#include "rec_proc.h"
#include "ascii_report.h"
#include "linux_model.h"
#include "linux_module.h"
#include "bfile.h"

/*
 * ------------------------ Defines and Macros -------------------------
 */
/* Default system software page size */
#ifndef PAGESZ
#define PAGESZ  4096
#endif  /* PAGESZ */

/* Maximum PID number */
#define MAXPID 32767

/*
 * --------------------- Static function prototypes ---------------------
 */
static int glean_proc(void);
static int exec_command(void);
static void do_sampling(void);
static int add_proc(pid_t);
static void restring_procs(void);
static void init_asctrace(void);

/*
 * void run_command(void)
 *
 * Exec the command with oprofile watching.
 */
void
run_command(void)
{
    struct sigaction act;

    mINFORM("In run_command");

    /* Initialize the output/input trace files as necesary */
    if (mTRACEOUT) bfile_init_out();
    if (mTRACEIN) bfile_init_in();

    /* If bug level more that 1, do timestamps regardless */
    if (gConf.bug_level>1) gDoGettimeofday = TRUE;

    /* Read System.map file to prepare for kernel profiles */
    if (read_system_map()) {
       pscr("Warning: couldn't import System.map file "
            "- kernel profiles disabled.\n");
       gConf.flags.do_kernel=FALSE;
    }

    /* Initialize the sampling engine */
    if (!mTRACEIN && op_init()) {
        ferr("oprofile module initialization failed\n");
        prospect_exit(1);
    }

    /* save the gOp and gConf config structs to trace */
    if (mTRACEOUT) bfile_write_configs();

    /* Build initial picture of system procs before we start profiling */
    if (!mTRACEIN && glean_proc()) {
        ferr("couldn't scan /proc\n");
        if (!mTRACEIN) op_shutdown();
        prospect_exit(1);
    }

    if (!mTRACEIN) {
        /* Attach to kernel sampler - this starts profiling */
        if (op_attach()) {
            ferr("failed to attach to oprofile module\n");
            op_shutdown();
            prospect_exit(1);
        }

        /* Install our alarm signal handler */
        act.sa_handler = sigalrm_handler;
        act.sa_flags = 0;
        if (sigaction(SIGALRM, &act, NULL)) {
            perr("failed to install SIGALRM handler");
            prospect_exit(1);
        }

        /* Set up signal mask for blocking if needed */
        sigemptyset(&gSigblockmask);
        sigaddset(&gSigblockmask, SIGALRM);

        /* Flush kernel sampling buffer and set up periodic flush */
        op_autoflush(gOp.flushrate);

        /* Exec our benchmark */
        exec_command();
    }
    
    /* Wait till bechmark ends and read samples */
    do_sampling();

    if (!mTRACEIN) {
        /* Flush kernel sampler one last time */
        op_empty_buffer();

        /* Turn off auto flushing */
        op_autoflush(0);

        /* Disable kernel sampler */
        op_shutdown();
    }

    /* find parent procs in tree */
    restring_procs();

    /* Print out report */
    print_done();

} /* run_command(void) */

/*
 * void profile_user(process_t *p, unsigned long pc, unsigned int cpu);
 * 
 * Add hits to process.  User space profile.
 * Note that hits can come in _after_ an exit.
 */
void profile_user(process_t *p, unsigned long pc, unsigned int cpu)
{
    unsigned long *slot;
    mINFORM("In profile_user(%p, 0x%lx, %u) pid=%d", p, pc, cpu,
            p ? p->pr_myPid : 0);

    if (p==NULL) {
        mINFORM(" passed null p, ignoring");
        return;
    }

    slot = (unsigned long*) DTI(p->pr_profile, pc);
    if (*slot==0L) {
        p->pr_profUniq++;
    }
    *slot+=1;
    p->pr_profTot++;

    if (cpu < gConf.numcpus) {
        if (p->pr_ucpu_log[cpu] == 0)
            p->pr_ucpus_used++;
        p->pr_ucpu_log[cpu]++;
    }
} /* profile_user() */

/*
 * void profile_sys(process_t *p, unsigned long pc, unsigned int cpu);
 *
 * Add hits to process.  Kernel space profile.
 * Note that hits can come in _after_ an exit.
 */
void profile_sys(process_t *p, unsigned long pc, unsigned int cpu)
{
    unsigned long *slot;
    mINFORM("In profile_sys(%p, 0x%lx, %u) pid=%d", p, pc, cpu,
            p ? p->pr_myPid : 0);

    if (p==NULL) {
        mINFORM(" passed null p, ignoring");
        return;
    }

    slot = (unsigned long*) DTI(p->pr_sysProfile, pc);
    if (*slot==0L) {
        p->pr_sysProfUniq++;
    }
    *slot+=1;
    p->pr_sysProfTot++;

    if (cpu < gConf.numcpus) {
        if (p->pr_kcpu_log[cpu] == 0)
            p->pr_kcpus_used++;
        p->pr_kcpu_log[cpu]++;
    }
} /* profile_sys() */

/*
 * void profile_kernel(process_t *p, unsigned long pc, unsigned int cpu);
 *
 * Add hits to kernel profile.  Add total hits, and add to kthread
 * hits if passed p is a kthread.
 */
void profile_kernel(process_t *p, unsigned long pc, unsigned int cpu)
{
    unsigned long *slot;

    mINFORM("In profile_kernel(%p, 0x%lx, %u) pid=%d", p, pc, cpu,
            p ? p->pr_myPid : 0);

    if (pc < gKernelVmOffset) {
        mBUG("profile_kernel passed non-kernel address %p", pc);
        return;
    }

    if (!gConf.flags.do_kernel) return;

    if (p && p->pr_isKthread) {                  /* add to kernel thread pool */
        mINFORM(" adding hits for kernel threads");
        slot = (unsigned long*) DTI(gK.k_prof.kp_kthreadProfile, pc);
        if (*slot==0L) {
            gK.k_prof.kp_kthreadProfUniq++;
        }
        *slot+=1;
        gK.k_prof.kp_kthreadProfTot++;
        if (cpu < gConf.numcpus) {
            if (gK.k_prof.kp_kthread_cpu_log[cpu] == 0)
                gK.k_prof.kp_kthread_cpus_used++;
            gK.k_prof.kp_kthread_cpu_log[cpu]++;
        }
    }
    else if (p) {                                    /* add to user proc pool */
        mINFORM(" adding hits for user procs");
        slot = (unsigned long*) DTI(gK.k_prof.kp_usrProfile, pc);
        if (*slot==0L) {
            gK.k_prof.kp_usrProfUniq++;
        }
        *slot+=1;
        gK.k_prof.kp_usrProfTot++;
        if (cpu < gConf.numcpus) {
            if (gK.k_prof.kp_usr_cpu_log[cpu] == 0)
                gK.k_prof.kp_usr_cpus_used++;
            gK.k_prof.kp_usr_cpu_log[cpu]++;
        }
    }
    
    if (!p) {                                        /* add to interrupt pool */
        mINFORM(" adding hits for interrupt context");
        slot = (unsigned long*) DTI(gK.k_prof.kp_intrProfile, pc);
        if (*slot==0L) {
            gK.k_prof.kp_intrProfUniq++;
        }
        *slot+=1;
        gK.k_prof.kp_intrProfTot++;
        if (cpu < gConf.numcpus) {
            if (gK.k_prof.kp_intr_cpu_log[cpu] == 0)
                gK.k_prof.kp_intr_cpus_used++;
            gK.k_prof.kp_intr_cpu_log[cpu]++;
        }
    }
   
    /* and add all to global pool */
    mINFORM(" adding hits for global profile");
    slot = (unsigned long*) DTI(gK.k_prof.kp_profile, pc);
    if (*slot==0L) {
        gK.k_prof.kp_profUniq++;
    }
    *slot+=1;
    gK.k_prof.kp_profTot++;
    if (cpu < gConf.numcpus) {
        if (gK.k_prof.kp_tot_cpu_log[cpu] == 0)
            gK.k_prof.kp_tot_cpus_used++;
        gK.k_prof.kp_tot_cpu_log[cpu]++;
    }

} /* profile_kernel() */


/*
 * int add_map_to_proc(process_t, char*)
 *
 * Add a region map defined in the line passed in to the proc struct
 * also passed in.  The format for the map line is as for 2.4.x in
 * /proc/pid/maps file.
 *
 * Note that this function is also used to read in regions saved to disk
 * in the form of a bfile, hence it's not static.
 */
int
add_map_to_proc(process_t *p, char *buf)
{
    char *path, *exe, *newline;
    region_t *r;

    if (buf[strlen(buf)-1] == '\n') buf[strlen(buf)-1] ='\0';
    mINFORM("   adding: %s", buf);

    /* filter out non-executable maps */
    exe = strchr(buf, 'x');
    if (!exe) {
        if (gConf.bug_level) mINFORM("   can't find executable mark, drop.");
        return 1;
    }

    /* create new region */
    r = CALLOC(sizeof(region_t), 1);
    if (!r) {
        mBUG("Calloc failed, line %d, file %s", __LINE__, __FILE__);
        prospect_exit(1);
    }

    /* parse range */
    if (sscanf(buf,"%lx-%lx", (unsigned long*)&r->rd_start, 
               (unsigned long*)&r->rd_end) != 2) {
        if (gConf.bug_level) mINFORM("   bad conversion for range for map");
        return 1;
    }
    r->rd_length = (unsigned long) (r->rd_end - r->rd_start);

    /* parse offset */
    exe+=2;
    if (sscanf(exe,"%lx", &r->rd_offset) != 1) {
        if (gConf.bug_level) mINFORM("   bad conversion for offset for map");
        return 1;
    }

    /* parse path, no paths maps are self-modifing executables */
    path = strchr(buf, '/');
    if (path) {
        r->rd_path = strdup(path);
        /* chomp */
        if ((newline=strchr(r->rd_path, '\n')) != NULL) *newline = '\0';
    }
    else { 
        r->rd_path = "<Executable_Memory_Region>";
    }

    /* set the pid */
    r->rd_pid = p->pr_myPid;

    /* link into vas */
    if (p->pr_vasHead->vh_fwd == NULL) {          /* first region */
        r->rd_bck = (void*) (p->pr_vasHead);
        p->pr_vasHead->vh_fwd = r;
    }
    else {
        region_t *tmp;
        tmp = p->pr_vasHead->vh_fwd;
        while(tmp->rd_fwd != (void*) p->pr_vasHead) tmp = tmp->rd_fwd;
        tmp->rd_fwd = r;
        r->rd_bck = tmp;
    }
    r->rd_fwd = (void*)p->pr_vasHead;
    p->pr_vasHead->vh_bck = r;

    p->pr_vasHead->vh_entries++;

    mINFORM("    start=0x%lx length=%ld offset=0x%lx entry=%d path=%s",
            r->rd_start, r->rd_length, r->rd_offset, 
            p->pr_vasHead->vh_entries, r->rd_path);

    return 0;
} /* add_map_to_proc() */

/* -------------------------  Static functions follow ----------------------- */

/*
 * static int glean_proc(void)
 *
 * Get all there currently is in /proc and
 * fill in our structs.
 */
static int
glean_proc(void)
{
    DIR *proc;
    struct dirent *proc_dir;
    pid_t pid;

    mINFORM("In glean_proc()");
   
    /* open the /proc directory */
    if (!(proc=opendir("/proc"))) {
        mINFORM(" opendir() failed, errno(%d): %s", errno, strerror(errno));
        perr("Serious Error: Could not open /proc");
        return 1;
    }

    /* look for numeric enties */
    while ((proc_dir=readdir(proc))) 
        if (sscanf(proc_dir->d_name, "%u", &pid) == 1) {
            if (gConf.bug_level)
                mINFORM("Adding process %d", pid);
            if (add_proc(pid)) {
                mINFORM("Problem adding process %d", pid);
                if (gConf.bug_level)
                    ferr("problem above with adding process %d", pid);
            }
            if (mTRACEOUT) bfile_write_proc(pid);
        }

    mINFORM(" added %d processes already running", gProclist.pl_numProcs);
    closedir(proc);
    return 0;
} /* glean_proc() */

/*
 * static int add_proc(pid_t pid)
 *
 * Add a pid found in /proc to our structs, also
 * read in the pid's memory maps and any other
 * pertinent information.
 */
static int
add_proc(pid_t pid)
{
    FILE *strm;
    process_t *p;
    char buf[MAXPATHLEN], fname[64], *ctmp;

    /* add process to global list */
    p = alloc_proc(pid); 
    p->pr_birthBy = cBIRTH_preExist;
    p->pr_exec_times = 1;
    putproc_by_pid(pid, p);

    /* read /proc/#/maps for the vas, but not for kthreads */
    sprintf(fname,"/proc/%u/maps", pid);
    if ((strm=fopen(fname, "r")) == NULL) {
        mINFORM("  error opening %s, errno(%d):%s", fname, errno, 
                strerror(errno));
        if (gConf.bug_level)
            ferr(" error opening %s, errno(%d):%s", fname, errno, 
                 strerror(errno));
        return 1;
    }
    /* add the maps */
    do {
        ctmp = fgets(buf, MAXPATHLEN-1, strm);
        if (ctmp) add_map_to_proc(p, buf);
    } while (ctmp);
    /* no maps whatsoever? */
    if (p->pr_vasHead->vh_entries == 0) {
        if (p->pr_path) {
            mINFORM(" problem reading maps - none seem to exist");
            if (gConf.bug_level)
                ferr(" problem reading maps - none seem to exist");
            return 1;
        }
        else {
            mINFORM(" no maps, but this is a kernel thread, so ok");
            p->pr_isKthread = TRUE;
            gProclist.pl_numKthreads++;
        }
    }
    else {
        mINFORM(" added %d maps to proc", p->pr_vasHead->vh_entries);
    }

    if (gConf.bug_level>1 && !p->pr_isKthread) {
        region_t *r;
        int i;

        mINFORM("  region list:");
        mINFORM("    head: 0x%lx   0x%lx   0x%lx", 
                (unsigned long) p->pr_vasHead,
                (unsigned long) p->pr_vasHead->vh_fwd,
                (unsigned long) p->pr_vasHead->vh_bck);
        r = p->pr_vasHead->vh_fwd;
        i = 0;
        do {
            mINFORM("  %6d: 0x%lx   0x%lx   0x%lx", 
                    i, (unsigned long) r, (unsigned long) r->rd_fwd, 
                    (unsigned long) r->rd_bck); 
            r = r->rd_fwd;
            i++;
        } while (r != (void*) p->pr_vasHead);
    }

    return 0;
} /* add_proc() */

/*
 * static void restring_procs(void)
 *
 * Go through process tree and find parents to attach
 * to children.
 */
static void
restring_procs(void)
{
    unsigned long idx;
    void **P, **slot;
    process_t *p;

    mINFORM("In restring_procs()");
    /* for all procs in tree */
    for (idx=0L, P  = DTF(gProclist.pl_procs, idx);
                 P != NULL;
                 P  = DTN(gProclist.pl_procs, idx)
        )
    {
        p = (process_t*) *P;
        if (p->pr_parent) continue;
        else {
            slot = DTG(gProclist.pl_procs, p->pr_myParentPid);
            if (slot) {
                p->pr_parent = *slot;
                mINFORM(" found parent, joins child:%u to parent:%u",
                        p->pr_myPid, p->pr_myParentPid);
            }
        }
    }

} /* restring_procs() */

/*
 * static void exec_command(void)
 *
 * It's time to exec our benchmark.
 */
static int
exec_command(void)
{
    pid_t child_pid;
    int ruid=getuid();

    mINFORM("In exec_command()");

    if (mTRACEIN) return 0;

    if (gConf.strm_info) {
        int i=0;
        inform_trace(__LINE__, __FILE__, " gConf.command:");
        while (gConf.command[i] != NULL) {
            inform_trace(__LINE__,"  %s", gConf.command[i]);
            i++;
        }
    }

    /* don't pollute child's priority */
    if (gConf.inherit_pri)
        make_realtime(REALTIME_OFF);

    /* do the fork */
    if ((child_pid = fork()) == -1) {
        perr("fork() failed");
        prospect_exit(1);
    }
    
    /* set before times */
    gBeforepoint = times(&gBeforetimes);
    
    /* for child */
    if (child_pid == 0) {
        /* reset to original owner */
        setresuid(ruid, ruid, ruid);
        /* exec the command */
        execvp(gConf.command[0], gConf.command);
        /* oh-oh */
        perr("execvp of \"%s\" failed", gConf.command[0]);
        _exit(0);
    }

    /* reset the realtime */
    if (gConf.inherit_pri)
        make_realtime(gConf.rt_pri);

    /* set my child */
    gConf.my_child.pid = child_pid;

    return 0;
} /* exec_command() */

/*
 * static void do_sampling(void)
 *
 * Main sample reading and records
 * processing loop.
 */
static void
do_sampling(void)
{
    int ret;
    struct sigaction act;

    mINFORM("In do_sampling()");
    
    /* Let quit and interrupt signals kill child but not self */
    if (!mTRACEIN) {
        act.sa_handler = SIG_IGN;
        act.sa_flags = 0;
        sigaction(SIGINT, &act, NULL);
        sigaction(SIGQUIT, &act, NULL);
    }

    /* if writing ascii trace out, take care of init */
    if (mASC_TRACEOUT) init_asctrace();

    /* read and process buffer while child still alive */
    do {
        if (op_read_buffer())
            break;
    } while (waitpid(gConf.my_child.pid, &ret, WNOHANG)!=gConf.my_child.pid);

    /* finish clocks */
    gAfterpoint = times(&gAftertimes);

    if (!mTRACEIN) {
        /* Don't ignore signals anymore, child is done */
        act.sa_handler = SIG_DFL;
        act.sa_flags = 0;
        sigaction(SIGINT, &act, NULL);
        sigaction(SIGQUIT, &act, NULL);

        /* Informative output if child killed */
        if (WIFSIGNALED(ret)) {
            ferr("child terminated by signal %d\n", WTERMSIG(ret));
            gConf.my_child.signalled = TRUE;
            gConf.my_child.terminated = TRUE;
            gConf.my_child.ts_signal = WTERMSIG(ret);
        }
        else if (WIFSTOPPED(ret)) {
            ferr("child stopped by signal %d\n", WSTOPSIG(ret));
            gConf.my_child.signalled = TRUE;
            gConf.my_child.terminated = FALSE;
            gConf.my_child.ts_signal = WSTOPSIG(ret);
        }
        else {
            gConf.my_child.signalled = FALSE;
        }

        if (mTRACEOUT) bfile_write_child();
    }

} /* do_sampling() */

/*
 * void init_asctrace(void)
 *
 * Initilaize the ascii trace out with some stuff.
 */
void
init_asctrace(void)
{
    time_t timer;
    char *str;
    mINFORM("In init_asctrace()");

    if (!mASC_TRACEOUT) return;

    time(&timer);
    str = ctime(&timer);

    mPTREC("Prospect %s ASCII Trace File\n", gConf.prospect_rev);
    mPTREC("Generated on: %s", str);
    mPTREC("Command line: %s\n", gConf.command_line);
    mPTREC("Uname: %s %s %s %s %s\n",
            gConf.my_utsname.sysname,
            gConf.my_utsname.nodename,
            gConf.my_utsname.release,
            gConf.my_utsname.version,
            gConf.my_utsname.machine);
    if (mTRACEIN) {
        mPTREC("Reading binary trace file.\n");
    }
    if (!gConf.flags.do_kernel)
        mPTREC("Kernel symbols not available (no System.map file?).\n");
    mPTREC("----------------------------\n");

} /* init_asctrace() */

