/* -*- c-file-style: "GNU" -*- */
/*
 * Copyright  CNRS, INRIA, Universit Bordeaux 1
 * See COPYING in top-level directory.
 */

#define _GNU_SOURCE 1
#define _REENTRANT

#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <semaphore.h>

#include <pomp_lib.h>

#include "gomp_ev_codes.h"
#include "eztrace.h"

//#define VERBOSE 1

#ifdef VERBOSE
#define FUNCTION_ENTRY				\
  {						\
    printf("Calling [%s]\n", __FUNCTION__);	\
    RECORD_HW_COUNTERS();			\
  }

#else
#define FUNCTION_ENTRY  RECORD_HW_COUNTERS()
#endif

static int pomp_found = 0;
#define GOMP_RECORD if(!pomp_found)

// todo: add hooks for
// OMP_Barrier
// OMP_critical ?
void (*libGOMP_parallel_loop_static_start)(void(*)(void *), void *, 
					   unsigned, long, long, long, long);
void (*libGOMP_parallel_loop_runtime_start)(void(*)(void *), void *, 
					    unsigned, long, long, long, long);
void (*libGOMP_parallel_loop_dynamic_start)(void(*)(void *), void *, 
					    unsigned, long, long, long, long);
void (*libGOMP_parallel_loop_guided_start)(void(*)(void *), void *, 
					   unsigned, long, long, long, long);

void (*libGOMP_parallel_start) (void (*fn) (void *), void *data,
				unsigned num_threads);
void (*libGOMP_parallel_end) ();

void (* libGOMP_critical_start) (void);
void (* libGOMP_critical_end) (void);

struct gomp_arg_t
{
  void (*func) (void *);
  void *data;
  int id;
};

/* Function called by GOMP_parallel_start for each thread */
void
gomp_new_thread (void *arg)
{
  FUNCTION_ENTRY;
  struct gomp_arg_t *_arg = (struct gomp_arg_t*) arg;
  void (*func) (void *) = _arg->func;
  void *data = _arg->data;
  int section_id = _arg->id;
  int nb_threads = omp_get_num_threads();
  int my_id = omp_get_thread_num();
  /* Since the runtime functions provide more information, let's use it instead of the compiler functions */
  EZTRACE_EVENT3 (FUT_GOMP_NEW_FORK, section_id, my_id, nb_threads);
  func (data);
  EZTRACE_EVENT1 (FUT_GOMP_NEW_JOIN, my_id);
  return;
}

static int _next_section_id = 0;

/* generic implementation of parallel loop
 */
#define GOMP_PARALLEL_LOOP_GENERIC(fn, data, varname, gomp_func)	\
  {									\
    FUNCTION_ENTRY;							\
    int section_id = _next_section_id++;				\
    EZTRACE_PROTECT_ON();						\
    /* Since the runtime functions provide more information, let's use it instead of the compiler functions */ \
    EZTRACE_EVENT4 (FUT_GOMP_PARALLEL_START, fn, data, num_threads, section_id); \
    struct gomp_arg_t *varname = (struct gomp_arg_t*) malloc (sizeof (struct gomp_arg_t)); \
    varname->func = fn;							\
    varname->data = data;						\
    varname->id = section_id;						\
    EZTRACE_PROTECT_OFF();						\
    gomp_func;								\
    int nb_threads = omp_get_num_threads();				\
    int my_id = omp_get_thread_num();					\
    EZTRACE_EVENT3 (FUT_GOMP_NEW_FORK, section_id, my_id, nb_threads);  \
    return;								\
  }

/* should be called when reaching #pragma omp parallel for schedule(static)
 * However, this function doesn't seem to be called. Let's implement it just in case.
 */
void GOMP_parallel_loop_static_start (void(*fn)(void *), void * data,
				      unsigned num_threads, long a1, long a2, long a3, long a4)
{
  GOMP_PARALLEL_LOOP_GENERIC(fn,
			     data,
			     arg,
			     libGOMP_parallel_loop_static_start (gomp_new_thread, arg, num_threads, a1, a2, a3, a4));
}

/* Function called when reaching  #pragma omp parallel for schedule(runtime) */
void GOMP_parallel_loop_runtime_start (void(*fn)(void *), void * data,
				       unsigned num_threads, long a1, long a2, long a3, long a4)
{
  GOMP_PARALLEL_LOOP_GENERIC(fn,
			     data,
			     arg,
			     libGOMP_parallel_loop_runtime_start (gomp_new_thread, arg, num_threads, a1, a2, a3, a4));
}

/* Function called when reaching  #pragma omp parallel for schedule(dynamic) */
void GOMP_parallel_loop_dynamic_start(void(*fn)(void *), void * data,
				      unsigned num_threads, long a1, long a2, long a3, long a4)
{
  GOMP_PARALLEL_LOOP_GENERIC(fn,
			     data,
			     arg,
			     libGOMP_parallel_loop_dynamic_start (gomp_new_thread, arg, num_threads, a1, a2, a3, a4));
}

/* Function called when reaching  #pragma omp parallel for schedule(guided) */
void GOMP_parallel_loop_guided_start(void(*fn)(void *), void * data,
				     unsigned num_threads, long a1, long a2, long a3, long a4)
{
  GOMP_PARALLEL_LOOP_GENERIC(fn,
			     data,
			     arg,
			     libGOMP_parallel_loop_guided_start (gomp_new_thread, arg, num_threads, a1, a2, a3, a4));
}

// Called by the main thread (ie. only once) during #pragma omp parallel
// (fork)
void
GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
{
  GOMP_PARALLEL_LOOP_GENERIC(fn,
			     data,
			     arg,
			     libGOMP_parallel_start (gomp_new_thread, arg, num_threads));
}


// Called at the end of a parallel section (~ join)
void
GOMP_parallel_end ()
{
  FUNCTION_ENTRY;
  /* Since the runtime functions provide more information, let's use it instead of the compiler functions */
  int my_id = omp_get_thread_num();
  EZTRACE_EVENT1 (FUT_GOMP_NEW_JOIN, my_id);
  libGOMP_parallel_end ();
  EZTRACE_EVENT0 (FUT_GOMP_JOIN_DONE);
}

void GOMP_critical_start ()
{
  FUNCTION_ENTRY;
  GOMP_RECORD EZTRACE_EVENT0 (FUT_GOMP_CRITICAL_START);
  libGOMP_critical_start ();
  GOMP_RECORD EZTRACE_EVENT0 (FUT_GOMP_CRITICAL_START_DONE);
}

void GOMP_critical_end ()
{
  FUNCTION_ENTRY;
  GOMP_RECORD EZTRACE_EVENT0 (FUT_GOMP_CRITICAL_STOP);
  libGOMP_critical_end ();
}


void c_pomp_finalize_()
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_FINALIZE);
}

void c_pomp_atomic_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_ATOMIC_ENTER);
}

void c_pomp_atomic_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_ATOMIC_EXIT);
}

void c_pomp_barrier_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_BARRIER_ENTER);
}

void c_pomp_barrier_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_BARRIER_EXIT);
}

void c_pomp_flush_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_FLUSH_ENTER);
}

void c_pomp_flush_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_FLUSH_EXIT);
}

void c_pomp_critical_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_CRITICAL_BEGIN);
}

void c_pomp_critical_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_CRITICAL_END);
}

void c_pomp_critical_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_CRITICAL_ENTER);
}

void c_pomp_critical_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_CRITICAL_EXIT);
}

void c_pomp_task_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_TASK_BEGIN);
}

void c_pomp_task_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_TASK_END);
}

void c_pomp_taskwait_enter_(struct ompregdescr* r  __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_TASKWAIT_ENTER);
}

void c_pomp_taskwait_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_TASKWAIT_EXIT);
}

void c_pomp_for_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_FOR_ENTER);
}

void c_pomp_for_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_FOR_EXIT);
}

void c_pomp_master_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_MASTER_BEGIN);
}

void c_pomp_master_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_MASTER_END);
}

void c_pomp_parallel_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_PARALLEL_BEGIN);
}

void c_pomp_parallel_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_PARALLEL_END);
}

void c_pomp_parallel_fork_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  int section_id = _next_section_id++;
  EZTRACE_EVENT1(FUT_POMP_PARALLEL_FORK, section_id);
}

void c_pomp_parallel_join_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_PARALLEL_JOIN);
}

void c_pomp_section_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SECTION_BEGIN);
}

void c_pomp_section_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SECTION_END);
}

void c_pomp_sections_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SECTIONS_ENTER);
}

void c_pomp_sections_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SECTIONS_EXIT);
}

void c_pomp_single_begin_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SINGLE_BEGIN);
}

void c_pomp_single_end_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SINGLE_END);
}

void c_pomp_single_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SINGLE_ENTER);
}

void c_pomp_single_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_SINGLE_EXIT);
}

void c_pomp_workshare_enter_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_WORKSHARE_ENTER);
}

void c_pomp_workshare_exit_(struct ompregdescr* r __attribute__((unused)))
{
  FUNCTION_ENTRY;
  EZTRACE_EVENT0(FUT_POMP_WORKSHARE_EXIT);
}

void (*libc_pomp_finalize_)();

void __gomp_init (void) __attribute__ ((constructor));
void
__gomp_init (void)
{
  INTERCEPT("GOMP_parallel_start", libGOMP_parallel_start);
  INTERCEPT("GOMP_parallel_end", libGOMP_parallel_end);
  INTERCEPT("GOMP_parallel_loop_static_start", libGOMP_parallel_loop_static_start);
  INTERCEPT("GOMP_parallel_loop_runtime_start", libGOMP_parallel_loop_runtime_start);
  INTERCEPT("GOMP_parallel_loop_dynamic_start", libGOMP_parallel_loop_dynamic_start);
  INTERCEPT("GOMP_parallel_loop_guided_start", libGOMP_parallel_loop_guided_start);

  INTERCEPT("GOMP_critical_start", libGOMP_critical_start);
  INTERCEPT("GOMP_critical_end", libGOMP_critical_end);

  INTERCEPT("c_pomp_finalize_", libc_pomp_finalize_);
  if(libGOMP_critical_end && !libc_pomp_finalize_) {
    fprintf(stderr, "Only GNU OpenMP runtime functions will be intercepted. For a more precise trace, please instrument your program with eztrace_cc.\n");
  }

  if(libc_pomp_finalize_)
    pomp_found = 1;
  else
    pomp_found = 0;

#ifdef EZTRACE_AUTOSTART
  eztrace_start ();
#endif
}

void __gomp_conclude (void) __attribute__ ((destructor));
void
__gomp_conclude (void)
{
  eztrace_stop ();
}
