/*
 * detailed_ita_smpl.c - detailed sampling output format for Itanium PMU
 *
 * Copyright (C) 2001-2003 Hewlett-Packard Co
 * Contributed by Stephane Eranian <eranian@hpl.hp.com>
 *
 * This file is part of pfmon, a sample tool to measure performance 
 * of applications on Linux/ia64.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 * 02111-1307 USA
 */

#include "pfmon.h"
#include "pfmon_itanium.h"

#include "pfmon_itanium.h"
#include <perfmon/perfmon_default_smpl.h>

#define SMPL_MOD_NAME		"detailed-itanium"
#define CHECK_VERSION(h)	(PFM_VERSION_MAJOR((h)) != PFM_VERSION_MAJOR(PFM_DEFAULT_SMPL_VERSION))

#define DETAILED_ITA_DFL_SMPL_ENTRIES	2048UL

typedef struct {
	unsigned long smpl_entries;
} detailed_ita_options_t;

static detailed_ita_options_t detailed_ita_options;

/*
 * forward declaration
 */
pfmon_smpl_module_t detailed_itanium_smpl_module;

static int
show_ita_btb_reg(FILE *fp, void *hash_desc, unsigned long j, pfm_ita_pmd_reg_t reg)
{
	int ret;
	int is_valid = reg.pmd8_15_ita_reg.btb_b == 0 && reg.pmd8_15_ita_reg.btb_mp == 0 ? 0 :1; 

	ret = fprintf(fp, "\tPMD%-2lu: 0x%016lx b=%d mp=%d valid=%c\n",
			j,
			reg.pmd_val,
			 reg.pmd8_15_ita_reg.btb_b,
			 reg.pmd8_15_ita_reg.btb_mp,
			is_valid ? 'Y' : 'N');

	if (!is_valid) return ret;

	if (reg.pmd8_15_ita_reg.btb_b) {
		uintptr_t addr;

		addr = 	reg.pmd8_15_ita_reg.btb_addr<<4;
		addr |= reg.pmd8_15_ita_reg.btb_slot < 3 ?  reg.pmd8_15_ita_reg.btb_slot : 0;

		fprintf(fp, "\t       Source Address: ");

		pfmon_print_address(fp, hash_desc, addr);

		ret = fprintf(fp, "\n\t       Taken=%c Prediction: %s\n\n",
			 reg.pmd8_15_ita_reg.btb_slot < 3 ? 'Y' : 'N',
			 reg.pmd8_15_ita_reg.btb_mp ? "Failure" : "Success");
	} else {
		fprintf(fp, "\t       Target Address: ");
		pfmon_print_address(fp, hash_desc, (reg.pmd8_15_ita_reg.btb_addr<<4));
		fputc('\n', fp);
		ret = fputc('\n', fp);
	}
	return ret;
}

static int
show_ita_btb_trace(FILE *fp, void *hash_desc, pfm_ita_pmd_reg_t reg, pfm_ita_pmd_reg_t *btb_regs)
{
	unsigned long i, last; 
	int ret;

	i    = reg.pmd16_ita_reg.btbi_full ? reg.pmd16_ita_reg.btbi_bbi : 0;
	last = reg.pmd16_ita_reg.btbi_bbi;

	DPRINT(("btb_trace: i=%lu last=%lu bbi=%d full=%d\n", 
			i,
			last, 
			reg.pmd16_ita_reg.btbi_bbi,
			reg.pmd16_ita_reg.btbi_full));

	do {
		ret = show_ita_btb_reg(fp, hash_desc, i+8, btb_regs[i]);
		i = (i+1) % 8;
	} while (i != last);

	return ret;
}

static int
print_ita_reg(pfmon_smpl_desc_t *csmpl, int rnum, unsigned long rval, pfm_ita_pmd_reg_t *btb_regs)
{
	static const char *tlb_levels[]={"N/A", "L2DTLB", "VHPT", "SW"};
	static const char *tlb_hdls[]={"VHPT", "SW"};

	pfm_ita_pmd_reg_t reg;
	pfm_ita_pmd_reg_t pmd16;
	pfmon_ita_options_t *opt = (pfmon_ita_options_t *)options.model_options;
	FILE *fp = csmpl->smpl_fp;
	void *hash_desc = csmpl->sym_hash;
	int ret = 0;
	int found_pmd16 = 0;

	reg.pmd_val = rval;

	switch(rnum) {
		case 0:
			fprintf(fp, "\tPMD0 : 0x%016lx, valid=%c, cache line ",
				reg.pmd_val,
				reg.pmd0_ita_reg.iear_v ? 'Y': 'N');
			/* cache line address */
			pfmon_print_address(fp, hash_desc, (reg.pmd0_ita_reg.iear_icla<<5));

			if (opt->opt_use_iear_tlb)
				ret = fprintf(fp, ", TLB %s\n", tlb_hdls[reg.pmd0_ita_reg.iear_tlb]);
			else
				ret = fputc('\n', fp);
			break;
		case 1:
			if (opt->opt_use_iear_tlb == 0)
				ret = fprintf(fp, "\tPMD1 : 0x%016lx, latency %d\n",
						reg.pmd_val,
						reg.pmd1_ita_reg.iear_lat);
			break;
		case 2:
			fprintf(fp, "\tPMD2 : 0x%016lx, address ", reg.pmd_val);
			pfmon_print_address(fp, hash_desc, reg.pmd_val);
			ret = fputc('\n', fp);
			break;
		case 3:
			fprintf(fp, "\tPMD3 : 0x%016lx ", reg.pmd_val);

			if (opt->opt_use_dear_tlb)
				ret = fprintf(fp, ", TLB %s\n", tlb_levels[reg.pmd3_ita_reg.dear_level]);
			else
				ret = fprintf(fp, ", latency %d\n", reg.pmd3_ita_reg.dear_latency);
			break;
		case 16:
			/*
			 * keep track of what the BTB index is saying
			 */
			pmd16 = reg;
			found_pmd16 = 1;
			break;
		case 17:

			ret = fprintf(fp, "\tPMD17: 0x%016lx, valid %c, address ",
					reg.pmd_val,
					reg.pmd17_ita_reg.dear_vl ? 'Y': 'N');

			pfmon_print_address(fp, hash_desc, ((reg.pmd17_ita_reg.dear_iaddr << 4) | reg.pmd17_ita_reg.dear_slot));

			fputc('\n', fp);
			break;
		default:
			/*
			* If we find a BTB then record it for later
			 */
			if (rnum>7 && rnum < 16)
				btb_regs[rnum-8] = reg;
			else
				ret = fprintf(fp, "\tPMD%-2d: 0x%016lx\n", rnum, reg.pmd_val);
	}

	if (found_pmd16) ret = show_ita_btb_trace(fp, hash_desc, pmd16, btb_regs);

	return ret;
}


static int
detailed_ita_process_samples(pfmon_smpl_desc_t *csmpl)
{
	pfm_ita_pmd_reg_t btb_regs[PMU_ITA_NUM_BTB];
	pfm_default_smpl_hdr_t *hdr;
	pfm_default_smpl_entry_t *ent;
	FILE *fp = csmpl->smpl_fp;
	void *hash_desc = csmpl->sym_hash;
	unsigned long msk, entry, i;
	uint64_t count;
	unsigned long last_smpl_pmds = 0UL;
	unsigned int ovfl_pmd, last_ovfl_pmd = PFMON_MAX_PMDS; /* undefined PMD */
	pfm_ita_pmd_reg_t *reg;
	int j, ret;

	hdr	   = csmpl->smpl_hdr;
	ent	   = (pfm_default_smpl_entry_t *)(hdr+1);
	entry	   = options.opt_aggr ? *csmpl->aggr_count : csmpl->entry_count;
	count      = hdr->hdr_count;

	DPRINT(("hdr_count=%lu hdr=%p\n", count, hdr));

	for(i=0; i < count; i++) {
		fprintf(fp, 
			"entry %lu PID:%d CPU:%d STAMP:0x%lx IIP:",
			entry,
			ent->pid,
			ent->cpu,
			ent->tstamp);

		pfmon_print_address(fp, hash_desc, ent->ip);

		ovfl_pmd = ent->ovfl_pmd;

		ret = fprintf(fp, "\n\tOVFL: %d LAST_VAL: %lu\n", ent->ovfl_pmd, -ent->last_reset_val);

		reg = (pfm_ita_pmd_reg_t*)(ent+1);

		msk = ovfl_pmd == last_ovfl_pmd ? last_smpl_pmds : options.rev_smpl_pmds[ovfl_pmd];
		last_smpl_pmds = msk;

		for(j=0; msk; msk >>=1, j++) {	
			if ((msk & 0x1) == 0) continue;
			ret = print_ita_reg(csmpl, j, reg->pmd_val, btb_regs);
			reg++;
		}

		last_ovfl_pmd = ovfl_pmd;

		/* fprintf() error detection */
		if (ret == -1) goto error;

		ent = (pfm_default_smpl_entry_t *)reg;	
		entry++;
	}
	/*
	 * when aggregation is used, for are guaranteed sequential access to
	 * this routine by higher level lock
	 */
	if (options.opt_aggr) {
		*csmpl->aggr_count += hdr->hdr_count;
	} else {
		csmpl->entry_count += hdr->hdr_count;
	}
	csmpl->last_count = count;

	return 0;
error:
	warning("cannot write to sampling file: %s\n", strerror(errno));
	/* not reached */
	return -1;
}

static int
detailed_ita_check_version(pfmon_smpl_desc_t *csmpl)
{
	pfm_default_smpl_hdr_t *hdr; 

	hdr   = csmpl->smpl_hdr;

	if (CHECK_VERSION(hdr->hdr_version)) {
		warning("format %s expect format v%u.x not v%u.%u %u\n", 
				SMPL_MOD_NAME,
				PFM_VERSION_MAJOR(PFM_DEFAULT_SMPL_VERSION),
				PFM_VERSION_MAJOR(hdr->hdr_version),
				PFM_VERSION_MINOR(hdr->hdr_version), hdr->hdr_version);
		return -1;
	}
	return 0;
}

static int
detailed_ita_check_new_samples(pfmon_smpl_desc_t *csmpl)
{
	pfm_default_smpl_hdr_t *hdr; 
	uint64_t last_ovfl;

	hdr       = csmpl->smpl_hdr;
	last_ovfl = csmpl->last_ovfl;

	if (hdr->hdr_overflows <= last_ovfl && last_ovfl != ~0UL && hdr->hdr_count == csmpl->last_count) {
		DPRINT(("skipping identical set of samples %lu <= %lu\n", hdr->hdr_overflows, last_ovfl)); 
		return -1;
	}
	/*
	 * XXX: fmt specific field in generic structure (last_ovfl)
	 */
	csmpl->last_ovfl = hdr->hdr_overflows;

	return 0;
}

/*
 * 000-255   reserved for generic options
 * 400-499   reserved for PMU specific options
 * 500-599   reserved for format specific options
 */
static struct option detailed_ita_cmd_options[]={
	{ "smpl-entries", 1, 0, 500},
	{ NULL, 0, 0, 0}
};

static void
detailed_ita_show_options(void)
{
	printf("\t--smpl-entries=val\t\tset number of entries for sampling buffer (default %lu)\n", DETAILED_ITA_DFL_SMPL_ENTRIES);
}

/*
 * 0  means we understood the option
 * -1 unknown option
 */
static int
detailed_ita_parse_options(int code, char *optarg, pfmon_lib_param_t *evt)
{
	char *endptr = NULL;

	switch(code) {
		case  500:
			if (detailed_ita_options.smpl_entries != DETAILED_ITA_DFL_SMPL_ENTRIES) 
				fatal_error("smpl-entries already defined\n");

			detailed_ita_options.smpl_entries = strtoul(optarg, &endptr, 0);

			if (*endptr != '\0') 
				fatal_error("invalid number of entries: %s\n", optarg);
			break;
		default:
			return -1;
	}
	return 0;

}


static size_t
detailed_ita_get_fmt_arg_size(void)
{
	return sizeof(pfm_default_smpl_ctx_arg_t);
}

#define MAX_PMD_COUNT		64
#define FUDGE_FACTOR		(sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*MAX_PMD_COUNT))
#define ENTRY_SIZE(npmd)	(sizeof(pfm_default_smpl_entry_t)+((npmd)*sizeof(unsigned long)))
static int
detailed_ita_initialize_ctx_arg(pfmon_ctx_arg_t *arg, unsigned int max_pmds_sample)
{
	pfm_default_smpl_ctx_arg_t *ctx_arg = (pfm_default_smpl_ctx_arg_t *)arg;
	unsigned long entry_size;

	entry_size = ENTRY_SIZE(max_pmds_sample);
	
	/*
	 * the fudge factor allows us to get exactly the number of entries specified by the
	 * user (or the default). The kernel module is such that it will consider the buffer
	 * full if less than PFM_DEFAULT_MAX_ENTRY_SIZE bytes are left in the buffer. Any
	 * entry size is <= PFM_DEFAULT_MAX_ENTRY_SIZE, therefore we will always record
	 * less than the specified number unless we increase the buffer size slightly.
	 */
	ctx_arg->buf_arg.buf_size = sizeof(pfm_default_smpl_hdr_t)
		                  + (FUDGE_FACTOR-entry_size)
				  + detailed_ita_options.smpl_entries*entry_size;

	/*
	 * copy the uuid of the format we are using
	 */
	memcpy(ctx_arg->ctx_arg.ctx_smpl_buf_id, detailed_itanium_smpl_module.uuid, sizeof(pfm_uuid_t));

	vbprintf("min buffer entries=%lu buffer_size=%lu max PMD/entry=%u\n", 
		detailed_ita_options.smpl_entries, 
		ctx_arg->buf_arg.buf_size, 
		max_pmds_sample);

	DPRINT(("max_pmds_sample=%u buf_size=%lu fudge=%lu buffer_header=%lu entry_header=%lu (max)entry_size=%lu\n", 
		max_pmds_sample, 
		ctx_arg->buf_arg.buf_size, 
		FUDGE_FACTOR, 
		sizeof(pfm_default_smpl_hdr_t), 
		sizeof(pfm_default_smpl_entry_t), 
		entry_size));

	return 0;
}

/*
 * module initialization
 */
static int
detailed_ita_initialize_module(void)
{
	detailed_ita_options.smpl_entries = DETAILED_ITA_DFL_SMPL_ENTRIES;

	return pfmon_register_smpl_mod_options(detailed_ita_cmd_options, sizeof(detailed_ita_cmd_options));
}

static int
detailed_ita_print_header(pfmon_smpl_desc_t *smpl)
{
	fprintf(smpl->smpl_fp, "# min sampling buffer entries: %lu\n", detailed_ita_options.smpl_entries);
	return 0;
}

pfmon_smpl_module_t detailed_itanium_smpl_module ={
	.name		    = SMPL_MOD_NAME,
	.pmu_mask	    = PFMON_PMU_MASK(PFMLIB_ITANIUM_PMU),
	.description	    = "itanium clear text sampling",
	.check_version	    = detailed_ita_check_version,
	.process_samples    = detailed_ita_process_samples,
	.get_fmt_arg_size   = detailed_ita_get_fmt_arg_size,
	.initialize_ctx_arg = detailed_ita_initialize_ctx_arg,
	.check_new_samples  = detailed_ita_check_new_samples,
	.show_options       = detailed_ita_show_options,
	.parse_options      = detailed_ita_parse_options,
	.initialize_module  = detailed_ita_initialize_module,
	.print_header       = detailed_ita_print_header,
	.uuid		    = PFM_DEFAULT_SMPL_UUID,
};
