#include "dmi_api.h"

typedef struct scaleunit_t
{
  int32_t niter;
  int64_t mutex_addr;
  int64_t barrier_addr;
  int64_t counter_addr;
}scaleunit_t;

void DMI_main(int argc, char **argv)
{
  scaleunit_t scaleunit;
  int32_t init_node_num, thread_num, niter, value;
  int64_t scaleunit_addr, mutex_addr, barrier_addr, counter_addr;
  
  if(argc != 4)
    {
      errn("usage : %s init_node_num thread_num niter", argv[0]);
      error();
    }
  init_node_num = atoi(argv[1]);
  thread_num = atoi(argv[2]);
  niter = atoi(argv[3]);
  
  catch(DMI_mmap(&scaleunit_addr, sizeof(scaleunit_t), 1, NULL));
  catch(DMI_mmap(&counter_addr, sizeof(int32_t), 1, NULL));
  catch(DMI_mmap(&mutex_addr, sizeof(DMI_mutex_t), 1, NULL));
  catch(DMI_mmap(&barrier_addr, sizeof(DMI_barrier_t), 1, NULL));
  
  scaleunit.mutex_addr = mutex_addr;
  scaleunit.barrier_addr = barrier_addr;
  scaleunit.counter_addr = counter_addr;
  scaleunit.niter = niter;
  catch(DMI_write(scaleunit_addr, sizeof(scaleunit_t), &scaleunit, DMI_EXCLUSIVE, NULL));
  
  catch(DMI_mutex_init(mutex_addr));
  catch(DMI_barrier_init(barrier_addr));
  value = 0;
  catch(DMI_write(counter_addr, sizeof(int32_t), &value, DMI_EXCLUSIVE, NULL));
  
  catch(DMI_rescale(scaleunit_addr, init_node_num, thread_num));
  
  catch(DMI_read(scaleunit.counter_addr, sizeof(int32_t), &value, DMI_GET, NULL));
  outn("# value=%d", value);
  
  catch(DMI_mutex_destroy(mutex_addr));
  
  catch(DMI_munmap(barrier_addr, NULL));
  catch(DMI_munmap(mutex_addr, NULL));
  catch(DMI_munmap(counter_addr, NULL));
  catch(DMI_munmap(scaleunit_addr, NULL));
  return;
}

int32_t DMI_scaleunit(int my_rank, int pnum, int64_t scaleunit_addr)
{
  DMI_local_barrier_t barrier;
  scaleunit_t scaleunit;
  int32_t iter, value;
  double t, sub_t;
  
  catch(DMI_read(scaleunit_addr, sizeof(scaleunit_t), &scaleunit, DMI_GET, NULL));
  bind_to_cpu(my_rank % PROCNUM);
  catch(DMI_local_barrier_init(&barrier, scaleunit.barrier_addr));
  
  catch(DMI_local_barrier_sync(&barrier, pnum));
  time_lap(10);
  
  for(iter = 0; iter < scaleunit.niter; iter++)
    {
      catch(DMI_mutex_lock(scaleunit.mutex_addr));
      /*
      catch(DMI_read(scaleunit.counter_addr, sizeof(int32_t), &value, DMI_GET, NULL));
      value++;
      catch(DMI_write(scaleunit.counter_addr, sizeof(int32_t), &value, DMI_PUT, NULL));
      */
      catch(DMI_mutex_unlock(scaleunit.mutex_addr));
      if(iter % 20 == 0)
        {
          //err("#%d ", iter);
        }
    }
  
  catch(DMI_local_barrier_sync(&barrier, pnum));
  sub_t = time_diff(10);
  catch(DMI_local_barrier_allreduce(&barrier, pnum, &sub_t, &t, DMI_OP_SUM, DMI_TYPE_DOUBLE));
  if(my_rank == 0)
    {
      outn("%d %.15lf", pnum, t / scaleunit.niter / pnum);
    }
  
  catch(DMI_local_barrier_destroy(&barrier));
  return 0;
}
