/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2001  Kevin P. Lawton
 *
 *  dt.h: Dynamic Translation header file
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

#ifndef __DT_H
#define __DT_H


/* ===== DT CONFIGURATION OPTIONS ===== */

#define DT_DEBUG 0  /* Print debug dump */

#define DT_ON 1 /* Use DT engine to run guest code */
//#define DT_ON 0       /* Run guest code native for speed comparison */

/* This is how many loop iterations the major body of code takes */
//#define DT_MacroLoops 2000000
#define DT_MacroLoops    400000

//#define DT_Workload 0   /* NOP */
#define DT_Workload 1 /* Cascading add loop (DT_MicroLoops) */

/*
 *  This is how many tight-loop iterations each code section takes.
 *  Must be between 1 and 255.
 */

//#define DT_MicroLoops  100
//#define DT_MicroLoops   10
#define DT_MicroLoops  5

#define DT_UseR3hStack 1

/*
 *  1=Use backpatch scheme for static out-of-page branches, 0=Always
 *  use lookup function.
 */

#define DT_UseBackpatch 1

/*
 *  Simulate guest context switches @ N usec intervals.  Since the
 *  system timer is used, the lower bound of this is determined by
 *  the setitimer() resolution.
 */

//#define DT_GuestTimeslice 10000
#define DT_GuestTimeslice 500000

#define DT_G2THashMethod 1

/* Only define if testing sparse table logic */
#define TestSparseTables 0

/* Do some extra Sparse Table releated sanity checks. */
#define STExtraSanityChecks 1

#if DT_DEBUG
#define InstrG2THit(i) ({instrG2THit[i]++;})
#define InstrG2TMiss() ({instrG2TMiss++;})
#else
#define InstrG2THit(i) ({})
#define InstrG2TMiss() ({})
#endif

#define CacheLineAlignment 32

/* ===== END OF DT CONFIGURATION OPTIONS ===== */


typedef unsigned long Bit32u;
typedef unsigned short Bit16u;
typedef unsigned char Bit8u;
typedef signed long Bit32s;
typedef signed short Bit16s;
typedef signed char Bit8s;

typedef struct {
  unsigned eflags;

  unsigned edi;
  unsigned esi;
  unsigned ebp;
  unsigned dummy_esp;
  unsigned ebx;
  unsigned edx;
  unsigned ecx;
  unsigned eax;

  unsigned es;
  unsigned ds;
  } __attribute__ ((packed)) gc_t;



extern void __seq0000(void);
extern void __seq0001(void);
extern void __seq0002(void);
extern void __seq0003(void);
extern void __seq0004(void);
extern void __exit_ok(void);
extern void __exit_bad(void);


/* Area for storing handler values */
extern Bit32u r3h_DS;
extern Bit32u r3h_ESP;
extern Bit32u r3h_ESP_empty;
extern Bit32u r3h_target_EIP;
extern volatile Bit32u globalID;


/* Area for storing guest values */
extern Bit32u guest_SS;
extern Bit32u guest_ESP;
extern Bit32u guest_EIP;


#define R3H_STACK_SIZE 65536
extern unsigned char r3h_stack[];


#define GUEST_STACK_SIZE 65536
extern unsigned char guest_page0[];
extern unsigned char guest_page1[];
extern Bit8u guest_stack[];
extern unsigned char *gp0, *gp1;


void hack_guest_code(void);
void __execute_guest_code_native(void);


extern Bit8u __r3h_branch_dynamic[];
extern Bit8u __r3h_branch_static[];
extern Bit8u __r3h_prime[];
extern Bit8u __r3h_ret[];

extern unsigned instrG2THit[];
extern unsigned instrG2TMiss;


/*
 *  The guest Linear to Meta index Hash table.  We need an efficient
 *  hash table to store translations from guest linear page
 *  addresses (upper 20 bits) to the DT meta page for that code page.
 *  This is quite similar to an i-TLB use in the CPU, except that
 *  rather than translate to physical addresses, we translate to
 *  the index of the DT meta page.
 *
 *  In the DT meta page, is a lookup table and other data specific
 *  to that particular code page, which can be accessed to find the
 *  address of a specific translated instruction if it exists.  Thus
 *  when we encounter new guest instruction addresses which are not
 *  in the G2T table, we have an efficient way to lookup the meta
 *  info for that code page.
 *
 *  This translation only concerns the upper 20bits, as the lower
 *  12bits are the page offset.  Bits 15..12 (4bits) from the linear
 *  address are used to select the hash block used.  Bits 31..16 (16bits)
 *  are stored in one of the translation pairs along with the
 *  corresponding meta index (which is also 16bits).  Since both
 *  quantities of each pair is 16bits, 8 pairs where chosen as the size of
 *  the hash block, because this fits neatly into 1 cache line on the
 *  Pentium+.  Thus DO NOT CHANGE the dimensions of this structure without
 *  considering the data size issues.
 *
 *    bits 31..16: stored as tag
 *    bits 15..12: selects hash block [0..15]
 *    bits 11..00: (page offset not used)
 */

#define DT_L2MHashWidth   8
#define DT_L2MHashHeight 16
#define DT_LPAToMIHash(lpa) ((lpa) & 0xf)
#define DT_LPAToMITag(lpa) (((lpa) >> 4) & 0xffff)

/* The following index value signifies the entry is available and does
 * not point to any construct.
 */
#define MetaIndexNone 0xffff

typedef struct {
  Bit16u tag;
  Bit16u metai;
  } __attribute__ ((packed)) dtL2MHash_t[DT_L2MHashHeight][DT_L2MHashWidth];

/* 16*8*4 = 512bytes */

extern dtL2MHash_t dtL2MHash;


/*
 *  The guest Linear to Translated address Hash table.  Once instructions
 *  have been translated and stored in the DT buffer, the address pairing
 *  (guest and translated instruction addresses) can be stored in this
 *  hash table.  For branch handling, this makes an efficient way to
 *  determine the associated translation buffer address for a given
 *  branch target address.  No extra protection checks are necessary
 *  before the branch is executed.  To allow for this, the following
 *  actions must occur:
 *
 *    - Buffer is completely invalidated for user<-->supervisor transitions
 *    - Buffer is completely invalidated for CS segment reloads
 */

#define DT_G2THashWidth  4  /* Fits in 1 Pentium+ cache line */
#define DT_G2THashHeight 8192   /* Need to tune this value */

/*
 *  NOTE: This hash select function needs to be coordinated with the
 *  hand coded assembly & generated tcode.
 */

#if DT_G2THashMethod == 0
#define DT_G2THashSelect(l) ( ((l)>>5) & 0x00001fff )   /* Need to tune this */
#else
#define DT_G2THashSelect(l) ( (l) & 0x00001fff )    /* Need to tune this */
#endif

/* 8192*4*8 = 256k */

#define TcodeOffsetNone 0xffffffff

typedef struct {
  Bit32u gOff;
  Bit32u tOff;
  } __attribute__ ((packed)) dtG2THash_t[DT_G2THashHeight][DT_G2THashWidth];

extern dtG2THash_t dtG2THash;


typedef struct {
  Bit32u base;
  Bit32u limit;
  } descriptor_t;




/* ============================
 * Sparse table lookup features
 * ============================
 */

/* A sparse table is used to efficiently maintain associations
 * between guest instruction addresses and corresponding translated
 * code sequence addresses, both in forward and reverse directions.
 *
 * L0: bits 11..8 (4bits)
 * L1: bits  7..5 (3bits)
 * L2: bits  4..0 (5bits)
 */


/* Level 2 (L2) of the sparse lookup is actually a linked
 * list, where each node contains STL2N entries.  Using
 * a full (array) frame to cover this part of the address space
 * would consume a lot of space because a lot of addresses will not
 * contain the start of scanned instructions.  The value of
 * STL2N can be 1 or more.  Storing more in each
 * quantum increases the search efficiency, but may consume
 * more space when elements are not used.  Values of
 * 1, 2, or 3 may make sense.
 */

#define STForwardL0N  16   /* Dont change */
#define STForwardL1N   8   /* Dont change */
#define STForwardL2N   3   /* (configurable) */

typedef struct stForwardL2Cluster_tag {
  union {
    struct {
      Bit32u addr4_0:5; /* For address match of bits 4..0 */
      Bit32u attributes:7;
      Bit32u tcodeOffset:20;
      } __attribute__ ((packed)) fields;
    Bit32u raw; /* access to all bits at once */
    } __attribute__ ((packed)) element[STForwardL2N];
  struct stForwardL2Cluster_tag *next;
  } __attribute__ ((packed)) stForwardL2Cluster_t;


/* Levels 0 and 1 (L0 and L1) are simple arrays, one element for
 * each combination of the address bits for the corresponding
 * address range.  Elements of L0 frames are frame indeces for
 * the next level (L1).  Elements of the L1 frames point to an
 * L2 linked list structure.
 */

typedef stForwardL2Cluster_t *stForwardL1Frame_t[8];
typedef stForwardL1Frame_t   *stForwardL0Frame_t[16];


#define STReverseL1N   4   /* Dont change */
#define STReverseL2N   3   /* (configurable) */

typedef struct stReverseL2Cluster_tag {
  union {
    struct {
      Bit32u addr5_0:6; /* For address match of bits 5..0 */
      Bit32u tcodeLen:8; /* length of tcode sequence */
      Bit32u pageOffset:12; /* iaddr page offset (bits 11..0) */
      Bit32u notUsed:6;
      } __attribute__ ((packed)) fields;
    Bit32u raw; /* access to all bits at once */
    } __attribute__ ((packed)) element[STReverseL2N];
  struct stReverseL2Cluster_tag *next;
  } __attribute__ ((packed)) stReverseL2Cluster_t;


/* ====================
 * Tcode chunk features
 * ====================
 */

#define TCodeChunkN    256 /* (configurable) Must be multiple of 8 */
#define TCodeChunkSize 256 /* Dont change this! */

typedef union tcodeChunk_tag {
  struct {
    /* For reverse lookup (tcode address -> instruction address), a
     * lookup table is used, which is similar to the forward lookup
     * mechanisms.  Each tcode chunk is 256 bytes (8 bits of address).
     * This address space is analogous to the L1 & L2 components of
     * forward lookup.  So for simplicity and consistency, we start
     * with L1.  The top level frame is embedded in the chunk.
     */
    stReverseL2Cluster_t *t2iL1[4];

    /* Pointer to next chunk used by associated guest code page.  The
     * meta info for the code page points to the first chunk, and
     * from there subsequent chunks which are allocated as needed
     * are chained together by this pointer.
     */
    union tcodeChunk_tag *next;

    /* The index of the next available data offset in this chunk. */
    Bit16u head;
    Bit16u tail;

    /* The index into the meta array so we can find which associated
     * code page owns this tcode chunk.
     */
    Bit32u ownerMetaIndex;
    } __attribute__ ((packed)) header;
  Bit8u raw[TCodeChunkSize];
  } __attribute__ ((packed)) tcodeChunk_t;

extern tcodeChunk_t tcodeChunk[TCodeChunkN];
extern Bit8u        tcodeChunkUsage[(TCodeChunkN+7) / 8];

extern tcodeChunk_t *allocTcodeChunk(unsigned metaIndex);

#define DoZero    (1<<0)
#define DontZero  (0<<0)
#define AtHead    (1<<1)
#define AtTail    (0<<1)
void *allocTcodeSpace(unsigned metaIndex, unsigned size, unsigned requests,
                      tcodeChunk_t **chunk);

/* =======================
 * Page Meta Info features
 * =======================
 */

#define DTPageMetaTableN 8  /* (configurable) Should be multiple of 8. */

typedef struct {
  stForwardL0Frame_t i2tL0; /* Level0 frame of iaddr->tcode sparse lookup */

  Bit32u lpa;   /* Linear Page Address */
  /* +++ other constraints need to be added here */

  tcodeChunk_t *tcodeChunkHead; /* ptr to 1st tcode chunk in list */
  tcodeChunk_t *tcodeChunkCurrent; /* ptr to current tcode chunk in list */
  } dtPageMeta_t;


extern dtPageMeta_t dtPageMetaTable[DTPageMetaTableN];
extern Bit8u        dtPageMetaTableUsage[(DTPageMetaTableN+7) / 8];



/* --------------------------------------------------------- */
#define R3HToMonRequestG2T   10
#define R3HToMonRequestPanic 11
#define R3HToMonRequestTerminate 12

Bit32u r3hToMonRequest(unsigned req, Bit32u data);

extern void __mon2r3h(void);
extern void __r3h2mon(void);

extern Bit32u mon_ESP;

extern unsigned r3h_request;
extern Bit32u r3h_data;
extern Bit32u idPatchDispl;
extern Bit32u jmpPatchDispl;


/* Variable declarations */

extern descriptor_t CS;
extern unsigned CPL;


/* Function declarations */

Bit32u dtTranslateG2T(Bit32u gOff);
Bit32u dtMetaLookupTcode(unsigned metaIndex, Bit32u gla);
Bit8u *dtAddTcode(unsigned metaIndex, Bit8u *tcode, unsigned tcodeLen,
                  Bit32u pOff);

Bit32u dtTranslateSequence(unsigned metaIndex, Bit32u gOff, Bit32u gla);

void debug_dump(void);
void debug_signal(int signo);

void dtInitialize(void);
void dtInitLPAToMIHashTable(void);
void dtInitG2THashTable(void);

void printSTForwardL0(unsigned metaIndex);
void printSTForwardL1(stForwardL1Frame_t *l1Frame);
void printSTForwardL2(stForwardL2Cluster_t *l2Cluster);
void printSTReverseL1(unsigned metaIndex, tcodeChunk_t *head);
void printSTReverseL2(stReverseL2Cluster_t *l2Cluster);
void stTcodeStats(unsigned metaIndex, tcodeChunk_t *head);
void stTcodeStatsL2(stReverseL2Cluster_t *l2Cluster);

#if TestSparseTables
void testSparseTables(void);
#endif

#endif /* __FRAGMENT_H */
