/*
 *  plex86: run multiple x86 operating systems concurrently
 *  Copyright (C) 1999-2001  Kevin P. Lawton
 *
 *  dt.c: Manages translated code sequence buffer and structures
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */


#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <signal.h>
#include <stdlib.h>

#include "dt.h"

/*
 * Synchronization of global ID patched into branch tcode
 *   (currently in r3h_branch).
 * Expand L2M --> LPAToMI
 * Hash table rows end at the first available entry.  If an entry
 *   is marked empty, shift all rightward entires to the left.
 * Segment expectations?  GS virtualized?  CS/SS?
 * Backpatching conditionally in C function, depending on
 *   which asm shim calls it.
 * Must increment globalID every context switch (anything that could
 *   make us recompare the criteria in the meta entries).
 */


dtL2MHash_t  dtL2MHash __attribute__ ((aligned(CacheLineAlignment)));
dtG2THash_t  dtG2THash __attribute__ ((aligned(CacheLineAlignment)));
dtPageMeta_t dtPageMetaTable[DTPageMetaTableN]
               __attribute__ ((aligned(CacheLineAlignment)));
Bit8u        dtPageMetaTableUsage[(DTPageMetaTableN+7) / 8]
               __attribute__ ((aligned()));

tcodeChunk_t tcodeChunk[TCodeChunkN] __attribute__ ((aligned(TCodeChunkSize)));
Bit8u        tcodeChunkUsage[(TCodeChunkN+7) / 8]
               __attribute__ ((aligned(CacheLineAlignment)));

#if DT_DEBUG
unsigned instrG2THit[DT_G2THashWidth];
unsigned instrG2TMiss;
#endif


  void
dtInitialize(void)
{
  /* Zero L2M and G2T hash tables for good measure. */
  memset(dtL2MHash, 0, sizeof(dtL2MHash));
  memset(dtG2THash, 0, sizeof(dtG2THash));

  /* Now, init the L2M and G2T hash tables. */
  dtInitLPAToMIHashTable();
  dtInitG2THashTable();

  /* Zero the page meta info table.  This stores DT info about
   * an associated guest code page.
   */
  memset(dtPageMetaTable, 0, sizeof(dtPageMetaTable));
  memset(dtPageMetaTableUsage, 0, sizeof(dtPageMetaTableUsage));

  /* Initialize the tcode chunks */
  memset(tcodeChunk,      0, sizeof(tcodeChunk));
  memset(tcodeChunkUsage, 0, sizeof(tcodeChunkUsage));

#if TestSparseTables
  /* Only used if testing sparse table logic */
  testSparseTables();
  exit(0);
#endif
}

  void
dtInitLPAToMIHashTable(void)
{
  unsigned hashRow;

  /* Mark the first pair in each hash row with a non-existant
   * meta index value.  This signifies that the entire row is empty
   * and available.  +++ For the L2M hash table, it may be more efficient
   * to just mark all entries during context switches, rather than
   * per-row dynamically, since it is farily small.
   */
  for (hashRow=0; hashRow<DT_L2MHashHeight; hashRow++) {
    dtL2MHash[hashRow][0].metai = MetaIndexNone;
    }
}

  void
dtInitG2THashTable(void)
{
  unsigned hashRow;

  /* Mark the first pair in each hash row with a non-existant
   * tcode address value.  This signifies that the entire row is empty
   * and available.  The rest of the row will be marked dynamically when
   * the first new pair is stored.  The idea is to prevent unnecessary
   * writes to a fairly large construct for every context switch.
   */
  for (hashRow=0; hashRow<DT_G2THashHeight; hashRow++) {
    dtG2THash[hashRow][0].tOff = TcodeOffsetNone;
    }
}


  static void
dtSetG2TPair(unsigned hashRow, Bit32u gOff, Bit32u tOff)
{
  /* Add a {guest offset, tcode offset} pairing to the G2T hash table 
   * at the hash table row provided.
   */

  unsigned hashCol;

  if (dtG2THash[hashRow][0].tOff == TcodeOffsetNone) {
    /* First entry in Row was marked empty.  This is our signal to mark the
     * rest of the entries in the row as empty.
     */
    for (hashCol = 0; hashCol < DT_G2THashWidth; hashCol++) {
      dtG2THash[hashRow][hashCol].tOff = TcodeOffsetNone;
      }
    hashCol = 0; /* Signal that 0th entry is available */
    }

  for (hashCol = 0; hashCol < DT_G2THashWidth; hashCol++) {
    if (dtG2THash[hashRow][hashCol].tOff == TcodeOffsetNone) {
      break;
      }
    }

  /* +++ Perhaps its smart to reorder the requested pair to the front
   * of the list, for performance.
   */
  if (hashCol < DT_G2THashWidth) {
    /* Row is not full; just add at next available slot for now. */
    dtG2THash[hashRow][hashCol].gOff = gOff;
    dtG2THash[hashRow][hashCol].tOff = tOff;
    }
  else {
    /* Row is full; +++ for now, bump 0th entry. Could shift all to
     * the right one, or use other replacement strategy here.
     */
    dtG2THash[hashRow][0].gOff = gOff;
    dtG2THash[hashRow][0].tOff = tOff;
    }
}


  static unsigned
dtTranslateLPAToMI(Bit32u lpa)
{
  /* Translate from a Linear Page Address to a Meta Index */

  unsigned metaI, hashRow, hashCol, tag;
  unsigned byte_offset, bit, usage, bitmask;


  /* Note: lpa is the upper 20 bits; the 32bit linear address >> 12 */
  hashRow = DT_LPAToMIHash(lpa);
  tag = DT_LPAToMITag(lpa);

  if (dtL2MHash[hashRow][0].metai == MetaIndexNone) {
    /* First entry in Row was marked empty.  This is our signal to mark the
     * rest of the entries in the row as empty.
     */
    for (hashCol = 1; hashCol < DT_L2MHashWidth; hashCol++) {
      dtL2MHash[hashRow][hashCol].metai = MetaIndexNone;
      }
    hashCol = 0; /* Signal that 0th entry is available */
    }
  else {
    /* There are some entries in this row.  Look for a match. */
    for (hashCol = 0; hashCol < DT_L2MHashWidth; hashCol++) {
      /* If the tag matches and the meta index is valid, then return
       * the meta index.
       */
      if ( (dtL2MHash[hashRow][hashCol].tag == tag) &&
           (dtL2MHash[hashRow][hashCol].metai != MetaIndexNone) )
        return(dtL2MHash[hashRow][hashCol].metai);
      /* If we find a free entry, that's the end of the row.  Stop here.
       * The value of hashCol is useful for allocating a new pair.
       */
      if ( dtL2MHash[hashRow][hashCol].metai == MetaIndexNone )
        break;
      }
    }

  /* LPA not in LPAToMI hash table.  Search meta entries for a match
   * of this LPA and processor context.  If we find a match, add it to the
   * LPAToMI hash table.  Several contexts can have mappings for a given
   * LPA, so it's important that the processor context matches also.
   */

  /* +++ For now, perform a brute-force lookup in the Meta table. */
  for (metaI = 0; metaI < DTPageMetaTableN; metaI++) {
    if (dtPageMetaTable[metaI].lpa == lpa) {
      /* +++ We need to match guest context here also! */
      /*
       *  Found the Meta element corresponding to this
       *  Linear Page Address.  We should cache this
       *  pairing in the LPA to Mi hash table so next
       *  time an efficient lookup will occur.
       */
      goto addToHashTable;
      }
    }

  /* LPA not in either hash table or meta table.  We need to create
   * a new meta table entry for this page.
   */

  for (byte_offset=0; byte_offset<(DTPageMetaTableN>>3); byte_offset++) {
    usage = dtPageMetaTableUsage[byte_offset];
    if (usage == 0xff) {
      /* All bits are set means all meta pages are in-use.  Skip this
       * group and go to next group.
       */
      continue;
      }
    for (bit=0; bit<8; bit++) {
      bitmask = 1<<bit;
      if ( !(usage & bitmask) ) {
        /* Found unused meta page. */
        metaI = (byte_offset<<3) | bit;
        /* Mark meta page in-use. */
        dtPageMetaTableUsage[byte_offset] |= bitmask;
        /* Zero allocated meta page. +++ Might be able to avoid this */
        memset(&dtPageMetaTable[metaI], 0, sizeof(dtPageMeta_t));
        dtPageMetaTable[metaI].lpa = lpa;
        goto addToHashTable;
        }
      }
    }

  printf("dtTranslateLPAToMI: all meta pages in-use.\n");
  /* Need to add replacement strategy code here. */
  exit(1);

addToHashTable:
  if (hashCol < DT_L2MHashWidth) {
    /* Row is not full; just add at next available slot */
    dtL2MHash[hashRow][hashCol].tag = tag;
    dtL2MHash[hashRow][hashCol].metai = metaI;
    }
  else {
    /* Row is full; +++ For now, just bump 0th entry.  Could
     * shift all to right one, or use other replacement strategy.
     */
    dtL2MHash[hashRow][0].tag = tag;
    dtL2MHash[hashRow][0].metai = metaI;
    }
  return( metaI );
}


  void *
allocTcodeSpace(unsigned metaIndex, unsigned size, unsigned requests,
                tcodeChunk_t **chunk)
{
  tcodeChunk_t *chunkCurrent;
  void         *dataPtr;

  /* Bounds sanity check on size; check against biggest possible space
   * which can be allocated.
   */
  if (size > (sizeof(tcodeChunk_t) - sizeof(tcodeChunk[0].header)) ) {
    printf("allocTcodeSpace: requested space of %u bytes too big\n", size);
    exit(1);
    }

  /* Find a tcode chunk with room for given data item. */
  chunkCurrent = dtPageMetaTable[metaIndex].tcodeChunkCurrent;
  if ( !chunkCurrent ) {
    /* No chunks allocated yet for this page; allocate one now,
     * and point both head and current pointers to it.
     */
    chunkCurrent =
      dtPageMetaTable[metaIndex].tcodeChunkHead =
        dtPageMetaTable[metaIndex].tcodeChunkCurrent =
          allocTcodeChunk(metaIndex);
    }
  else {
    int room;

    room = (chunkCurrent->header.tail - chunkCurrent->header.head) - 1;
    if ( room < size ) {
      /* There is no room in this chunk for the tcode sequence.
       * Allocate another chunk and chain it with the current one.
       */
      chunkCurrent->header.next = allocTcodeChunk(metaIndex);
      chunkCurrent = chunkCurrent->header.next;
      dtPageMetaTable[metaIndex].tcodeChunkCurrent = chunkCurrent;
      }
    }

  /* Get pointer to storage area */
  if (requests & AtHead) {
    /* Request to place data at head - towards beginning of chunk */
    dataPtr = (void *) &chunkCurrent->raw[chunkCurrent->header.head];
    /* Advance availability index */
    chunkCurrent->header.head += size;
    }
  else {
    /* Request to place data at tail - towards end of chunk */
    dataPtr = (void *) &chunkCurrent->raw[(chunkCurrent->header.tail-size)+1];
    /* Advance availability index */
    chunkCurrent->header.tail -= size;
    }

  /* Return a pointer to the chunk used, if requested */
  if (chunk)
    *chunk = chunkCurrent;

  /* Zero structure if requested. */
  if (requests & DoZero)
    memset(dataPtr, 0, size);

  return( dataPtr );
}

  tcodeChunk_t *
allocTcodeChunk(unsigned metaIndex)
{
  unsigned byte_offset, bit, usage, bitmask, chunk;

  for (byte_offset=0; byte_offset<(TCodeChunkN>>3); byte_offset++) {
    usage = tcodeChunkUsage[byte_offset];
    if (usage == 0xff) {
      /* All bits are set means all chunks are in-use */
      continue;
      }
    for (bit=0; bit<8; bit++) {
      bitmask = 1<<bit;
      if ( !(usage & bitmask) ) {
        /* Found unused chunk */
        chunk = (byte_offset<<3) | bit;
        tcodeChunkUsage[byte_offset] |= bitmask; /* Mark chunk in-use */
        /* Zero allocated chunk.  +++ We could zero only the header here */
        memset(&tcodeChunk[chunk], 0, sizeof(tcodeChunk_t));
        /* Mark the header as used space */
        tcodeChunk[chunk].header.head = sizeof(tcodeChunk[0].header);
        /* The free-space tail starts at the end of the chunk */
        tcodeChunk[chunk].header.tail = sizeof(tcodeChunk[0]) - 1;
        /* Set owner of chunk to corresponding meta page */
        tcodeChunk[chunk].header.ownerMetaIndex = metaIndex;
        return( &tcodeChunk[chunk] ); /* Return addr of allocated chunk */
        }
      }
    }

  printf("allocTcodeChunk: all chunks in-use.\n");
  exit(1);
}



  Bit32u
dtTranslateG2T(Bit32u gOff)
{
  /* Translate from a guest offset to tcode offset */

  unsigned hashRow, hashCol, metaIndex;
  Bit32u tAddr;
  Bit32u gla;

  /*
   *  Search the G2T table first, ideally the instruction will have
   *  been translated already, and the translation address in there.
   */

  hashRow = DT_G2THashSelect(gOff);

  if (dtG2THash[hashRow][0].tOff == TcodeOffsetNone) {
    /* First entry in Row was marked empty.  This is our signal to mark the
     * rest of the entries in the row as empty.
     */
    for (hashCol = 0; hashCol < DT_G2THashWidth; hashCol++) {
      dtG2THash[hashRow][hashCol].tOff = TcodeOffsetNone;
      }
    hashCol = 0; /* Signal that 0th entry is available */
    }
  else {
    for (hashCol = 0; hashCol < DT_G2THashWidth; hashCol++) {
      /* If the guest offset matches, and the tcode address is valid, then
       * return the tcode address.
       */
      if ( (dtG2THash[hashRow][hashCol].gOff == gOff) &&
           (dtG2THash[hashRow][hashCol].tOff != TcodeOffsetNone) ) {
        /* +++ It might be smart to reorder this entry to the beginning
         * of the list for performance.
         */
        return( dtG2THash[hashRow][hashCol].tOff );
        }
      /* If we find a free entry, that's the end of the row.  Stop here.
       * The value of hashCol is useful for allocating a new pair.
       */
      if ( dtG2THash[hashRow][hashCol].tOff == TcodeOffsetNone ) {
        break;
        }
      }
    }

  /* Guest offset not in hash tables.  Find the corresponding meta
   * index (if any) and look it up in the tcode for that page.
   */

  gla = CS.base + gOff;
  metaIndex = dtTranslateLPAToMI(gla >> 12);
  tAddr = dtMetaLookupTcode(metaIndex, gla);
  if (tAddr == 0) {
    /* Instruction does not have associated tcode; we must translate. */
    tAddr = dtTranslateSequence(metaIndex, gOff, gla);
    }
  /* Now add the {guest offset, tcode address pair} to the hash table. */
  dtSetG2TPair(hashRow, gOff, tAddr);

  /* Return the tcode address */
  return tAddr;
}


  Bit32u
dtMetaLookupTcode(unsigned metaIndex, Bit32u gla)
{
  /* Lookup a tcode offset associated with the guest linear address,
   *   in a particular meta page.
   */

  unsigned l0Bits, l1Bits, l2Bits;
  stForwardL1Frame_t   *l1Frame;
  stForwardL2Cluster_t *l2Cluster;
  unsigned clusterI;

  l0Bits = (gla >> 8) & 0xf; /* 4 bits */
  l1Bits = (gla >> 5) & 0x7; /* 3 bits */
  l2Bits = gla & 0x01f; /* 5 bits */

  /* The L0 frame is embedded in the PageMeta entry.  We can index it to
   * get a pointer to the L1 frame.
   */
  l1Frame = dtPageMetaTable[metaIndex].i2tL0[l0Bits];

  if (l1Frame) {
    /* There is an allocated L1 frame for this part of the address range.
     * Index it to get a pointer to the L2 cluster.
     */
    l2Cluster = (*l1Frame)[l1Bits];
    while (l2Cluster) {
      /* One or more L2 clusters have been allocated for this part of the
       * address range.  Search them for a match of the final address bits.
       */
      for (clusterI=0; clusterI<STForwardL2N; clusterI++) {
        if ( l2Cluster->element[clusterI].raw &&
            (l2Cluster->element[clusterI].fields.addr4_0 == l2Bits) ) {
          /* Found address match in sparse table for this page */
          Bit32u tcodeAddr;
          /* Address is start of tcode buffer plus offset */
          tcodeAddr = ((Bit32u) tcodeChunk) +
            l2Cluster->element[clusterI].fields.tcodeOffset;
          return( tcodeAddr );
          }
        }
      /* No match found in this cluster, search next one in linked list */
      l2Cluster = l2Cluster->next;
      }
    }

  return 0; /* not found */
}


  Bit8u *
dtAddTcode(unsigned metaIndex, Bit8u *tcode, unsigned tcodeLen,
           Bit32u pOff)
{
  unsigned l0Bits, l1Bits, l2Bits;
  stForwardL1Frame_t   *l1Frame;
  stForwardL2Cluster_t *forwardL2Cluster;
  stReverseL2Cluster_t *reverseL2Cluster;
  unsigned clusterI;
  Bit8u *tcodePtr;
  Bit32u tcodeOffset;
  tcodeChunk_t *tcodeChunkUsed;

  /* ===============================================================
   * FORWARD mapping: put tcode sequence in a tcode chunk and create
   * a mapping from guest instruction address (page offset) to
   * tcode address.
   */
  l0Bits = (pOff >> 8) & 0xf; /* L0: 4 bits */
  l1Bits = (pOff >> 5) & 0x7; /* L1: 3 bits */
  l2Bits = pOff & 0x01f;      /* L2: 5 bits */

  /* The L0 frame is embedded in the PageMeta entry.  We can index it to
   * get a pointer to the L1 frame.
   */
  l1Frame = dtPageMetaTable[metaIndex].i2tL0[l0Bits];

  if (!l1Frame) {
    /* An L1 frame does not exist.  Allocate one now. */
    l1Frame =
      dtPageMetaTable[metaIndex].i2tL0[l0Bits] =
        allocTcodeSpace(metaIndex, sizeof(stForwardL1Frame_t),
                        DoZero | AtTail, 0);
    }

  /* Index the L1 frame to get a pointer to the L2 cluster. */
  forwardL2Cluster = (*l1Frame)[l1Bits];
  if ( !forwardL2Cluster ) {
    /* An L2 cluster has not yet been allocated.  Allocate now, and
     * use the 0th entry.
     */
    forwardL2Cluster =
      (*l1Frame)[l1Bits] =
        allocTcodeSpace(metaIndex, sizeof(stForwardL2Cluster_t),
                        DoZero | AtTail, 0);
    clusterI = 0; /* Use the 0th entry in the cluster */
    }
  else {
    /* An L2 cluster list is already allocated.  Use convenience
     * function to add an entry to it.  This function will either find
     * an available slot, or create a new cluster in the list.  It
     * returns a pointer to the cluster, and the element index.
     */

    /* Search through the L2 cluster linked list for a matching address. */
    while (1) {
      for (clusterI=0; clusterI<STForwardL2N; clusterI++) {
        if ( forwardL2Cluster->element[clusterI].raw == 0 ) {
          /* Found empty slot in cluster */
          goto foundForwardL2;
          }
#if STExtraSanityChecks
        if ( forwardL2Cluster->element[clusterI].fields.addr4_0 == l2Bits ) {
          printf("dtAddTcode: found duplicate forward address.\n");
          exit(1);
          }
#endif
        }
      /* A match was not found in cluster, try next in linked list. */
      if (forwardL2Cluster->next) {
        forwardL2Cluster = forwardL2Cluster->next;
        }
      else {
        forwardL2Cluster =
          forwardL2Cluster->next =
            allocTcodeSpace(metaIndex, sizeof(stForwardL2Cluster_t),
                            DoZero | AtTail, 0);
        clusterI = 0; /* Use the 0th entry in the cluster */
        goto foundForwardL2;
        }
      }

foundForwardL2:
    }

  /* Allocate space for tcode, no need to zero because we are going
   * to memcpy() it to the allocated space.
   */
  tcodePtr = allocTcodeSpace(metaIndex, tcodeLen,
                             DontZero | AtHead, &tcodeChunkUsed);

  /* Copy tcode sequence to tcode buffer (chunk) area. */
  memcpy(tcodePtr, tcode, tcodeLen);

  tcodeOffset = ((Bit32u) tcodePtr) - ((Bit32u) tcodeChunk);
#if STExtraSanityChecks
  if (tcodeOffset >= sizeof(tcodeChunk)) {
    printf("dtAddTcode: sanity check fails: tcodeOffset too big.\n");
    exit(1);
    }
#endif

  /* Fill in L2 values */
  forwardL2Cluster->element[clusterI].fields.addr4_0 = l2Bits;
  forwardL2Cluster->element[clusterI].fields.attributes = 0; /* +++ for now */
  forwardL2Cluster->element[clusterI].fields.tcodeOffset = tcodeOffset;



  /* ===============================================================
   * REVERSE mapping: create a mapping from tcode address to the
   * corresponding guest instruction address (page offset).
   * Note that sparse table to cover the FORWARD mapping (i2t)
   * spans 12-bits (4096-bytes).  It is broken up into
   * L0,L1,L2={4,3,5}-bits.  But each tcode chunk is only 8-bits (256-bytes).
   * An exception can easily yield the tcode chunk ID, and each chunk
   * lists the ownerMetaIndex in it's header.  So the sparse table for
   * the tcode chunk only needs to span 8-bits.  We can use the same
   * constructs as from the FOWARD mapping, if we view the REVERSE
   * address space as the L1 & L2 ranges: L1,L2={3,5}-bits.
   */

  /* Get L1 and L2 address bits */
  l1Bits = (((Bit32u) tcodePtr) >> 6) & 0x3; /* L1: 2 bits */
  l2Bits = ((Bit32u) tcodePtr) & 0x03f;      /* L2: 6 bits */

  /* Index the L1 frame to get a pointer to the L2 cluster. */
  reverseL2Cluster = tcodeChunkUsed->header.t2iL1[l1Bits];
  if ( !reverseL2Cluster ) {
    reverseL2Cluster =
      tcodeChunkUsed->header.t2iL1[l1Bits] =
        allocTcodeSpace(metaIndex, sizeof(stReverseL2Cluster_t),
                        DoZero | AtTail, 0);
    clusterI = 0; /* Use the 0th entry in the cluster. */
    }
  else {
    /* Search through the L2 cluster linked list for a matching address. */
    while (1) {
      for (clusterI=0; clusterI<STReverseL2N; clusterI++) {
        if ( reverseL2Cluster->element[clusterI].raw == 0 ) {
          /* Found empty slot in cluster */
          goto foundReverseL2;
          }
        }
      /* A match was not found in cluster, try next in linked list. */
      if (reverseL2Cluster->next) {
        reverseL2Cluster = reverseL2Cluster->next;
        }
      else {
        reverseL2Cluster =
          reverseL2Cluster->next =
            allocTcodeSpace(metaIndex, sizeof(stReverseL2Cluster_t),
                            DoZero | AtTail, 0);
        clusterI = 0; /* Use the 0th entry in the cluster */
        goto foundReverseL2;
        }
      }

foundReverseL2:
    }

  /* Fill in the reverse mapping L2 values */
  reverseL2Cluster->element[clusterI].fields.addr5_0 = l2Bits;
  reverseL2Cluster->element[clusterI].fields.tcodeLen = tcodeLen;
  reverseL2Cluster->element[clusterI].fields.pageOffset = pOff;
  reverseL2Cluster->element[clusterI].fields.notUsed = 0;
  
  return( tcodePtr );
}
