// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/local_reg_alloc.cpp,v 1.3 2001/12/14 08:04:22 xhshi Exp $
//


#include "defines.h"
#include "ir.h"
#include "flow_graph.h"
#include "local_reg_alloc.h"
#include "expression.h"

static unsigned fold_hash(Operand *opnd)
{
    unsigned result;
    if (opnd->is_reg())
        result = opnd->bv_position() % MAX_FOLD_SIZE;
    else
        result = (((unsigned)opnd)>>2)%MAX_FOLD_SIZE;
    return result;
}

//
// definition of a local live range ends the live range
//
void Local_Reg_Manager::ends_live_range(Operand *opnd) {
    if (opnd == NULL || !opnd->is_reg()) return;

    Reg_Operand *reg = (Reg_Operand*)opnd;
    if(reg->assigned_preg() != n_reg) {
        assert(reg->assigned_preg() < MAX_LOCAL_REGS);
        _live_preg_lr &= ~(1<<reg->assigned_preg()); // free reg

        mark_preg_busy(reg->assigned_preg()); // remove reg from _free_bv

    } else if (!reg->global_reg_alloc_cand() && 
               !IS_FP_DBL_TYPE(reg->type) &&
               !reg->is_arg() && !reg->is_ret()) {
        //
        // check if reg is already in _ready_be_assigned
        //
        int entry = reg->assigned_entry();
        if (entry == NOT_YET_ASSIGNED)
            return;
        //
        // find a available reg
        //
        unsigned avail_regs = _free_bv[entry];
        if (!avail_regs) {
            reg->set_global_reg_cand();
        } else {
            //
            // check if preference is available
            //
            unsigned i = _preference[entry];
            if (i == n_reg || !(avail_regs & (1 << i))) {
                // find whatever is available
                for (i = 0; i < _max_local_regs; i++)
                    if (avail_regs & (1<<i)) break;
            }
            assert(i != _max_local_regs);
            reg->set_assigned_preg((X86_Reg_No)i);

            mark_preg_busy(i); // remove reg from _free_bv
        }
        //
        // free _ready_be_assigned entry
        //
        _ready_be_assigned[entry] = NULL;
	    _free_entry |= (1 << entry);
        _next_free = entry;
    }
}

void Local_Reg_Manager::starts_live_range(Operand *opnd) {
    if (opnd == NULL || !opnd->is_reg()) return;

    Reg_Operand *reg = (Reg_Operand*)opnd;
    if(reg->assigned_preg() != n_reg) {
        X86_Reg_No preg = reg->assigned_preg();
        assert(preg < MAX_LOCAL_REGS);
        _live_preg_lr |= (1<<preg); 

        mark_preg_busy(preg); // remove reg from _free_bv
    } else if (!reg->global_reg_alloc_cand() && 
               !IS_FP_DBL_TYPE(reg->type) &&
               !reg->is_arg() && 
               !reg->is_ret() &&
                reg->is_single_def_temp_reg()) {
        //
        // check if reg is already in _ready_be_assigned
        //
        if (reg->assigned_entry() != NOT_YET_ASSIGNED) {
            assert(_ready_be_assigned[reg->assigned_entry()] == reg);
            return;
        }
        //
        // if no more register available for assignment, then we mark reg as
        // a candidate of global register allocation (i.e. let global register 
        // allocation find a register for reg).
        //
        if (!_free_entry) {
            reg->set_global_reg_cand();
		    return;
        }
	    //
	    // a free entry exists; find one using the next_free_entry hint
	    //
	    while (!(_free_entry & (1 << _next_free))) {
		    _next_free++;
		    if (_next_free == _max_local_regs)
			    _next_free = 0;
	    }

        assert(_ready_be_assigned[_next_free] == NULL);
        _ready_be_assigned[_next_free] = reg;
        reg->set_assigned_entry(_next_free);
        _free_bv[_next_free] = (_avail_local_regs & ~_live_preg_lr);
        _preference[_next_free] = n_reg;
	    //
	    // increment hint to next register
	    //
	    _free_entry &= ~(1 << _next_free);
	    if (++_next_free == _max_local_regs)
		    _next_free = 0;
    }
}

void Local_Reg_Manager::call_live_range(Call_Inst *call) {
    //
    // When we come across a call, all temp registers of _read_be_assigned are 
    // marked as global reg allocation candidates because their live ranges 
    // contain the call which destorys all scratch registers.
    //
    unsigned i;
    for (i = 0; i < _max_local_regs; i++) {
        if (_ready_be_assigned[i] != NULL) {
            _ready_be_assigned[i]->set_global_reg_cand();
            _ready_be_assigned[i] = NULL;
        }
    }
    //
    // ends live range of return value (free ret register)
    //
    Inst *ret = call->get_ret();
    if (ret != NULL && !IS_FP_DBL_TYPE(ret->type())) {
        assert(ret->is_assignment() && ret->src(0)->is_ret());
        _live_preg_lr &= ~(1<<((Ret_Operand*)ret->src(0))->assigned_preg());
    }
    //
    // starts live ranges of all arguments (mark arg registers busy)
    //
    for (i = 0; i < call->n_args(); i++) {
        _live_preg_lr |= (1<<call->get_arg_opnd(i)->assigned_preg()); 
    }
}

//
// set src's preference to be the assigned reg of dst
//
void Local_Reg_Manager::set_preference(Reg_Operand *dst, Reg_Operand *src) {
    assert(dst->is_reg() && src->is_reg());

    // dst is assigned a reg but src is not
    if (dst->assigned_preg() != n_reg && src->assigned_preg() == n_reg) {
        unsigned entry = src->assigned_entry();
        //
        // src has an entry in _ready_be_assigned[]
        //
        if (entry != NOT_YET_ASSIGNED && _ready_be_assigned[entry] == src) 
            _preference[entry] = dst->assigned_preg();
    }
}

//
// insert the assignment i (dst = src) into the hash table 
//
void Folding::insert(Inst *i) {
    assert(i->is_assignment());
    Operand *src = i->src(0);
    //
    // long operand is separated into lo 32bit and hi 32bit.  They shouldn't kill
    // each other
    //
    if ((src->kind == Operand::Field || src->kind == Operand::Array)){
        if ((!_ary_fld || _ary_fld->src(0)->hi_opnd() != i->src(0)))
            _ary_fld = i;
    }
    else {
        unsigned entry = fold_hash(i->src(0));
        (new (_mem) Fold_Link(i))->insert_after(&_table[entry]);
        //
        // inc size[entry]. If size[entry] > MAX_PER_ENTRY, remove one from
        // the end of _table[entry]
        //
        _size[entry]++;
        assert(_size[entry] <= NUM_FOLD_PER_ENTRY);
        if (_size[entry] == NUM_FOLD_PER_ENTRY) {
            _size[entry]--;
            _table[entry].prev()->unlink();
        }
    }
}

//
// return the operand that can be folded into src
//
Operand *Folding::lookup_replace_opnd(Operand *s) {
    assert(s->is_single_def_temp_reg());
    Temp_Reg *src = (Temp_Reg*)s;
    Inst *def = src->inst();
    Operand *src_of_def = def->src(0);
    assert(def && src_of_def);

    //
    // first, replace src with immediate 
    //
    if (def->is_imm_assignment())
        return src_of_def;
    //
    // replace src with addr or long constant
    //
    if (def->is_const_assignment()) {
        O3_Jit_Type ty = src_of_def->type;
        if (ty == JIT_TYPE_ADDR || ty == JIT_TYPE_FLOAT || ty == JIT_TYPE_DOUBLE)
            return src_of_def;
        else if (src_of_def->type == JIT_TYPE_LONG)
            return (src->is_hi()) ? ((Const_Operand*)src_of_def)->hi() :
                                    ((Const_Operand*)src_of_def)->lo() ;
    }
    //
    // replace src with array or field access
    //
    if (_ary_fld != NULL) {
        if (_ary_fld->dst() == src)
            return _ary_fld->src(0);
        else if (_ary_fld->dst()->hi_opnd() == src)
            return _ary_fld->src(0)->hi_opnd();
    }
    //
    // replace src with static access
    //
    unsigned entry = fold_hash(src_of_def);
    Fold_Link *head = &_table[entry];
    Fold_Link *fold;
    for (fold = head->next(); fold != head; fold = fold->next())
        if (fold->elem->dst() == src) 
            return fold->elem->src(0);
    // 
    // fail to fold src then mark src as a global register allocation candidate
    //
    if (def->is_assignment()) {
//        src->set_global_reg_cand();
        def->unmark_dead();
    }
    return NULL;
}

//
// Because we want to guarantee that exceptions happen in the right order,
// we don't allow field accesses to be folded into their compute instructions
// if there are other instructions that could potentially throw exceptions
// between the field accesses and the compute instructions.
// For instance,      t11 = [t8 + 16]        --- field access
//                    [t5 + t4*4 + 8] = ...  --- array access
//                    t10 + t11 
//
// If "[t8 + 16]" is folded into "t10 + t11", then exceptions may happen in
// different order (NULL pointer and array out of bound exceptions).
//
void Folding::killed_by_ary_fld(Operand *opnd) {
    if (opnd->kind != Operand::Array && opnd->kind != Operand::Field)
        return;
    if (!_ary_fld || _ary_fld->src(0)->hi_opnd() != opnd)
        _ary_fld = NULL;
}

void Folding::killed_by(Operand *opnd) {
    if (opnd->kind == Operand::Array || opnd->kind == Operand::Field) 
        _ary_fld = NULL;
    else if (opnd->kind == Operand::Static || opnd->is_reg()) {
        //
        // kill array or field if opnd is used as the base or index
        //
        if (_ary_fld != NULL && opnd->is_reg()) {
            Operand *src0 = _ary_fld->src(0);
            assert(src0->kind == Operand::Array || src0->kind == Operand::Field);
            Operand *base = src0->base();
            Operand *indx = src0->index();
            unsigned bvp = opnd->bv_position();
            if (base->bv_position() == bvp ||
                (indx != NULL && indx->is_reg() && indx->bv_position() == bvp))
                _ary_fld = NULL;
        }
        unsigned entry = fold_hash(opnd);
        //
        // remove ( "..= static" or ".. = reg") from the hash table
        //
        Fold_Link *head = &_table[entry];
        Fold_Link *fold;
        for (fold = head->next(); fold != head;) {
            Fold_Link *next = fold->next();
            if (fold->elem->src(0) == opnd || 
                (opnd->assigned_preg() != n_reg && 
                fold->elem->src(0)->assigned_preg() == opnd->assigned_preg())) {
                fold->unlink();
                _size[entry]--; 
            }
            fold = next;
        }
        //
        // kill array or field operand if opnd == base() or opnd == index()
        //
        if (_ary_fld) {
            Operand *src = _ary_fld->src(0);
            assert(src->base() != NULL);
            if (src->base() == opnd || src->index() == opnd)
                _ary_fld = NULL;
        }
    }
}

//
// Some iA32 addressing modes requires that certain operands must be in
// registers. For those operands, we only try to fold registers (basically, it
// is the same as copy propagation).  After global optimization, we won't see 
// any advantage of this kind of folding operations.  However, if global 
// optimization phase is not performed, there are a lot opportunities.
//
void Folding::replace_reg_opnd(Reg_Operand*& opnd) {
    if (!opnd->is_single_def_temp_reg()) return;
    Temp_Reg *r = (Temp_Reg*)opnd;
    Inst *def = r->inst();
    if (def->is_reg_assignment()) {
        Operand *new_src = lookup_replace_opnd(r);
        if (new_src != NULL) {
            assert(new_src->is_reg());
            opnd = (Reg_Operand*)new_src;
            if(!r->global_reg_alloc_cand())
                def->mark_dead();
        }
    } else
        def->unmark_dead();
}

class Find_Closure : public Closure {
public:
    Find_Closure(Expressions& e, unsigned mk, Flow_Graph *fg)
        : exprs(e), marker(mk), fg(fg){}
    Expressions& exprs;
    unsigned marker;
    Flow_Graph *fg;
};

static void find_local_reg_cand(Cfg_Node *node, Closure *c) {
    Find_Closure *fc = (Find_Closure*)c;
    Operand *src, *dst;
    Inst *head = node->IR_instruction_list();
    Inst *i;
    for (i = head->next(); i != head; i = i->next()) {
        //
        // expand long instructions and helper functions
        //
        if (i->is_switch())
            i = ((Switch_Inst*)i)->special_expansion(fc->exprs,node);
        else {
            i = i->expand(fc->exprs);
            i->bounds_expansion(fc->exprs, node, fc->fg);
        }
    }
    for (i = head->next(); i != head; i = i->next()) {
        //
        // determine if src(j) is a local reg alloc candidate
        //
        const Inst::Info *inf = i->info();
        int j;
        for (j = 0; j < i->n_srcs; j++) {
            i->src(j)->find_local_reg_cand(inf->src_in_reg[j],fc->marker);
        }
        //
        // if the assignment stores the value of src back to memory, then
        // src must be in register because only one operand of move instruction
        // can be memory access.
        //
        dst = i->dst();
        if (i->is_assignment()) {
            src = i->src(0);
            if (dst->is_mem() && src->is_temp_reg())
                ((Temp_Reg*)src)->set_assign_local_reg();
        }
        //
        // mark dst to indicate that dst is defined within the current block
        //
        if (dst != NULL) {
#if 0
            if (dst->is_temp_reg())
                ((Temp_Reg*)dst)->set_no(fc->marker);
            else 
#endif
                if (dst->is_mem()) // take care field/array accesses
                dst->find_local_reg_cand(0,fc->marker);
        }
    }
    (fc->marker)++; // inc marker for subsequent blocks
}

//
//  t1 = [t0 + 4]       We would like to told [t0 + 4] into "add" instead of t1
//  t2 = t1             (replace t2 with [t0 + 4]
//  add t2, t3
//  
Operand *lookup_replace_opnd(Folding& fold, Operand *src) {
    Operand *new_src = fold.lookup_replace_opnd(src);
    if (new_src != NULL && new_src->is_foldable()) {
        Operand *t = lookup_replace_opnd(fold,new_src);
        if (t != NULL) return t;
    }
    return new_src;
}
//
// Folding memory operands is basically the same as code motion.  We need
// to watch out if exceptions may be thrown out of order.  Therefore, we
// preserve the order of those memory accesses that can potentially throw 
// exceptions (killed_by and killed_by_call)
//
static void fold_operand(Cfg_Node *node, Closure *c) {
    Mem_Manager mm(10);
    Folding fold(mm);
    Operand *dst, *src;
    Inst *head = node->IR_instruction_list();
    Inst *i;
    for (i = head->next(); i != head; i = i->next()) {
        unsigned j;
        for (j = 0; j < i->n_srcs; j++) {
            src = i->src(j);
            if (src->is_foldable()) {
                //
                // look for the operand that can be folded into src
                //
                assert(src->is_single_def_temp_reg());
#if 1
                Operand *new_src = lookup_replace_opnd(fold,src);
#else
                Operand *new_src = fold.lookup_replace_opnd(src);
#endif
                //
                // if j_src must be in reg, then we can only fold reg
                // widen instruction (movsx) can only take reg/mem so we have
                // to prevent folding imm into widen instruction
                //
                bool j_src_in_reg = i->info()->src_in_reg[j] == 1;
                if ( new_src != NULL && 
                    (!i->is_widen() || new_src->kind != Operand::Immediate) &&
                    ((!new_src->is_fp_stk() && (!j_src_in_reg || new_src->is_reg())) ||
                     ( new_src->is_fp_stk() && i->is_assignment()) )) {
                    i->replace_src(j,new_src);
                    if(!((Temp_Reg*)src)->global_reg_alloc_cand())
                        ((Temp_Reg*)src)->inst()->mark_dead();
                }
            } else
                // field/array accesses kill other field/array accesses
                fold.killed_by_ary_fld(src); 
            src->fold_operand(fold); // fold reg into field/array
        }
        dst = i->dst();
        if (dst) dst->fold_operand(fold); // fold reg into field/array
        
        if (i->is_assignment()) {
            src = i->src(0);
            fold.killed_by(dst);
            if (dst->is_foldable() && 
                (src->is_aliased_across_call() || src->is_reg())) {
                fold.insert(i);
            }
        } else if (/*i->is_iinc()*/dst != NULL) 
            fold.killed_by(dst);
        else if (i->is_call()) 
            fold.reset(); // call kills all static, array, and field accesses
    }
}

class Reg_Closure : public Closure {
public:
    Local_Reg_Manager& rm;
    Reg_Closure(Local_Reg_Manager& r) : rm(r) {}
};

//
// go through IR inst list and perform local register allocation
//
static void local_reg_bb(Cfg_Node *node, Closure *c) {
    Local_Reg_Manager &local_reg = ((Reg_Closure*)c)->rm;
    local_reg.reset();
    Inst *head = node->IR_instruction_list();
    //
    // traverse in REVERSE order and allocate registers to local live ranges
    //
    Inst *i = head->prev(); 
    while (i != head) {
        Inst *prev = i->prev();
        //
        // remove dead assignments of folding operand pass
        //
        if (i->is_dead() || i->is_pseudo_asgn()) 
            i->unlink();
        else {
            Operand *dst = i->dst();
            //
            // definition ends a live range
            //
            if (dst != NULL) {
                local_reg.ends_live_range(dst);
                dst->starts_live_range_from_dst(local_reg);
            }
            //
            // call ends live range of ret and starts live ranges of args
            //
            if (i->is_call()) 
                local_reg.call_live_range((Call_Inst*)i);
            //
            // use starts a live range
            //
            int j;
            for (j = 0; j < i->n_srcs; j++)
                i->src(j)->starts_live_range(local_reg);
            //
            // (t1 = t2): set t2's preference so that local register allocation
            // attempts to find reg(t1) for t2 if it is available.
            // 
            if (i->is_reg_assignment() && i->dst()->is_reg())
                local_reg.set_preference((Reg_Operand*)i->dst(),(Reg_Operand*)i->src(0));
        }
        i = prev;
    }
}

unsigned local_reg_allocation(Flow_Graph *fg, Expressions& exprs) {
    //
    // determine local register allocation candidates
    //
    Find_Closure FC(exprs,1,fg);
    fg->apply(find_local_reg_cand,&FC);
    //
    // fold imm and memory operands into their compute instructions
    //
#ifdef TRACE_O3
    fg->print_cfg("xxx");
#endif // TRACE_O3
    fg->apply(fold_operand,(Closure*)NULL);
#ifdef TRACE_O3
    fg->print_cfg("zzz");
#endif // TRACE_O3
    //
    // assign registers to local temp regs
    //
    Local_Reg_Manager local_reg(ALL_X86_CALLER_REGS);
    Reg_Closure c(local_reg);
    fg->apply(local_reg_bb,(Closure*)&c);
    return local_reg.ever_used_pregs();
}

