#ifndef _dmi_api_
#define _dmi_api_

#include "dmi_system.h"

#define DEBUG_ACCESS FALSE
#define DEBUG_MUTEX FALSE
#define DEBUG_MMAP FALSE
#define DEBUG_PROFILE FALSE

#define CONST_DEFAULT_CORE_NUM (sysconf(_SC_NPROCESSORS_CONF) / 2);
#define CONST_DEFAULT_MEM_SIZE ((int64_t)32ULL * 1024 * 1024 * 1024)
#define CONST_DEFAULT_PORT 7880
#define CONST_RESCALE_HALT_INTERVAL 0.3
#define CONST_OPTION_SIZE 256
#define CONST_KILL_COUNT 3
#define CONST_NODE_MAX 256
#define CONST_ATHREAD_MAX 256
#define CONST_MTHREAD_MAX 256
#define CONST_STHREAD_MAX 256
#define CONST_VM_MAX2 2048
#define CONST_RECORD_MAX (1024 * 1024)
#define CONST_DEFAULT_PAGE_SIZE (256 * 1024)
#define CONST_THREAD_MALLOC_EXPAND_SIZE (8 * 1024 * 1024)
#define CONST_THREAD_MALLOC_MMAP_SIZE (512)
#define CONST_DMI_MALLOC_EXPAND_SIZE (8 * 1024 * 1024)
#define CONST_DMI_MALLOC_MMAP_SIZE (512)

#define SYS_DOUBLE_DUMMY 1e23
#define SYS_INT_DUMMY -INT_MAX
#define SYS_INT64_DUMMY -INT64_MAX
#define SYS_CAS_TAG -20
#define SYS_FAS_TAG -21
#define SYS_CAS_OBJECT_TAG -22
#define SYS_FAS_OBJECT_TAG -23
#define SYS_DOUBLE_BARRIER_TAG -24
#define SYS_INT64_BARRIER_TAG -25
#define SYS_FAD_TAG -26

#define THREAD_MAIN 20
#define THREAD_NORMAL 21
#define THREAD_SCHEDULED 22
#define THREAD_SCHEDULER_A 23
#define THREAD_PROXY 24
#define THREAD_SCALEUNIT 25

#define STATE_OPEN 20
#define STATE_OPENED 21
#define STATE_CLOSE 22
#define STATE_CLOSED 23

#define STATE_NONE 20
#define STATE_HEAD1 21
#define STATE_HEAD2 22

#define COMMAND_NONE 20
#define COMMAND_KILL 21
#define COMMAND_SIGINT 22
#define COMMAND_FORK 23
#define COMMAND_EXIT 24

#define DMI_OP_MAX 20
#define DMI_OP_MIN 21
#define DMI_OP_SUM 22
#define DMI_OP_PROD 23

#define DMI_TYPE_CHAR 20
#define DMI_TYPE_SHORT 21
#define DMI_TYPE_INT 22
#define DMI_TYPE_LONG 23
#define DMI_TYPE_LONGLONG 24
#define DMI_TYPE_FLOAT 25
#define DMI_TYPE_DOUBLE 26

#define PROFILE_NONE 20
#define PROFILE_IN 21
#define PROFILE_OUT 22

#define DMI_PUT ACCESS_REMOTE
#define DMI_EXCLUSIVE ACCESS_LOCAL
#define DMI_GET ACCESS_ONCE
#define DMI_INVALIDATE ACCESS_INVALIDATE
#define DMI_UPDATE ACCESS_UPDATE
#define DMI_OPENED STATE_OPENED
#define DMI_OPEN STATE_OPEN
#define DMI_CLOSE STATE_CLOSE
#define DMI_CLOSED STATE_CLOSED
#define DMI_TRUE TRUE
#define DMI_FALSE FALSE
#define DMI_AUTOMATIC -1
#define DMI_PROFILE_SECTION _tls_profile_flag
#define DMI_DEBUG_SECTION _tls_debug_flag
#define DMI_ID_UNDEF SYS_ID_UNDEF

#pragma pack (push, 8)

typedef struct wdmi_t
{
  int8_t profile_flag;
  int64_t main_addr;
  int64_t dmi_memalloc_addr;
  int64_t dmi_member_addr;
  int64_t dmi_forkkill_addr;
  int64_t dmi_mthread_addr;
  int64_t dmi_profile_addr;
  struct profile_t *profile;
  struct config_t *config;
  dmi_t *dmi;
}wdmi_t;

typedef struct wdmi_thread_t
{
  int32_t mthread_id;
  int64_t stackarea_addr;
  dmi_thread_t dmi_thread;
  struct mheader_t *dummy_mheader;
  struct mheader_t *head_mheader;
  struct scale_thread_t *scale_thread;
}wdmi_thread_t;

typedef struct mheader_t
{
  struct mheader_t *next_mheader;
  int64_t size;
}mheader_t;

typedef struct mheader2_t
{
  int64_t next_mheader2_addr;
  int64_t size;
}mheader2_t;

typedef struct stackarea_t
{
  dmi_thread_t dmi_thread;
  int64_t next_addr;
}stackarea_t;

typedef struct ticket_t
{
  char *option;
  int8_t command_type;
  int64_t dmi_forkkill_addr;
  vector_t *pid_vector;
  dmi_thread_t dmi_thread;
  pthread_t pthread;
  pthread_cond_t cond;
  pthread_mutex_t mutex;
}ticket_t;

typedef struct config_t
{
  int argc;
  char **argv;
  char *ip;
  char *profile_name;
  int8_t profile_type;
  int8_t catch_flag;
  int8_t detach_flag;
  int8_t main_flag;
  uint16_t port;
  uint16_t listen_port;
  int32_t core_num;
  int64_t mem_size;
}config_t;

typedef struct record_t
{
  int64_t vm_seq;
  int64_t count;
  int64_t page_offset;
  int64_t size;
}record_t;

typedef struct idid_t
{
  int64_t id1;
  int64_t id2;
}idid_t;

typedef struct dmi_mthread_t
{
  int64_t stackareaes_addr;
  int64_t dmi_idpool_addr;
}dmi_mthread_t;

typedef struct dmi_member_t
{
  int64_t dmi_mutex_addr;
  int64_t dmi_cond_addr;
  int64_t dmi_nodes_addr;
  int64_t node_num_addr;
  int64_t rescale_node_num_addr;
}dmi_member_t;

typedef struct dmi_node_t
{
  int32_t state;
  int32_t dmi_id;
  int32_t core;
  int64_t memory;
  char hostname[IP_SIZE];
}dmi_node_t;

typedef struct dmi_forkkill_t
{
  int64_t dmi_mutex_addr;
  int64_t dmi_cond_addr;
  int64_t forkkill_addr;
}dmi_forkkill_t;

typedef struct forkkill_t
{
  int8_t phase;
  int8_t command_type;
  int32_t dmi_id;
  int32_t exit_dmi_id;
  struct dmi_node_t dmi_node;
  char option[CONST_OPTION_SIZE];
}forkkill_t;

typedef struct rwset_t
{
  int64_t write_size;
  int64_t write_addr;
  group_t read_group;
}rwset_t;

typedef struct dmi_rwset_t
{
  int32_t rwset_num;
  int64_t element_size;
  int64_t element_num;
  int64_t write_addrs_addr;
  int64_t domains_addr;
}dmi_rwset_t;

typedef struct domain_t
{
  int64_t write_addr;
  int64_t write_element_num;
  int64_t read_element_num;
  int64_t read_elements_addr;
}domain_t;

typedef struct rwset2_t
{
  int64_t read_size;
  int64_t read_addr;
  int64_t *addrs;
  int64_t *ptr_offsets;
  int64_t *write_sizes;
  group_t write_group;
}rwset2_t;

typedef struct dmi_rwset2_t
{
  int32_t rwset2_num;
  int64_t element_size;
  int64_t element_num;
  int64_t idids_addr;
  int64_t read_elements_addrs_addr;
  int64_t read_element_nums_addr;
  int64_t read_addrs_addr;
}dmi_rwset2_t;

typedef struct profile_t
{
  int8_t type;
  int32_t vm_num;
  int32_t vm_max;
  int64_t dmi_mutex_addr;
  int64_t thread_seqs_addr;
  int64_t vm_seqs_addr;
  idid_t *idids;
  vector_t *record_vector;
  FILE *fp;
  pthread_mutex_t mutex;
}profile_t;

typedef struct dmi_profile_t
{
  int8_t type;
  int32_t vm_num;
  int32_t vm_max;
  int32_t name_size;
  int64_t dmi_mutex_addr;
  int64_t thread_seqs_addr;
  int64_t vm_seqs_addr;
  int64_t name_addr;
  int64_t idids_addr;
}dmi_profile_t;

typedef struct barrier_t
{
  int64_t int64_sum_addr;
  int64_t double_sum_addr;
  int64_t int64_center_addr;
  int64_t double_center_addr;
}barrier_t;

typedef struct dmi_barrier_t
{
  int64_t int64_sum_addr;
  int64_t double_sum_addr;
  int64_t int64_center_addr;
  int64_t double_center_addr;
}dmi_barrier_t;

typedef struct dmi_spinlock_t
{
  int8_t flag;
}dmi_spinlock_t;

typedef struct dmi_mutex_t
{
  int64_t head_addr;
  int64_t next_addr;
  int64_t stopper1_addr;
  int64_t stopper2_addr;
}dmi_mutex_t;

typedef struct dmi_cond_t
{
  int8_t state;
  int64_t head1_addr;
  int64_t head2_addr;
}dmi_cond_t;

typedef struct dmi_memalloc_t
{
  int64_t head_mheader2_addr_addr;
  int64_t dummy_mheader2_addr;
  dmi_mutex_t dmi_mutex;
}dmi_memalloc_t;

typedef struct dmi_idpool_t
{
  int32_t id_max;
  int64_t sp_addr;
  int64_t unused_addr;
  int64_t dmi_spinlock_addr;
}dmi_idpool_t;

typedef struct sthread_t
{
  int8_t state;
  int64_t dmi_addr;
  dmi_thread_t dmi_thread;
}sthread_t;

typedef struct migthread_t
{
  int8_t exit_flag;
  int8_t migrate_flag;
  int32_t sthread_id;
  int32_t src_dmi_id;
  int32_t dst_dmi_id;
  dmi_node_t target_dmi_node;
  status_t status;
  int8_t check_flags[CONST_NODE_MAX];
}migthread_t;

typedef struct resource_t
{
  int8_t state;
  int32_t sthread_num;
  dmi_node_t dmi_node;
}resource_t;

typedef struct dmi_scheduler_t
{
  int32_t dmi_id;
  int64_t sthreads_addr;
  int64_t phases_addr;
  int64_t resources_addr;
  int64_t init_flag_addr;
  int64_t final_flag_addr;
  int64_t dmi_cond_addr;
  int64_t dmi_mutex_addr;
  int64_t dmi_mutex2_addr;
  int64_t dmi_idpool_addr;
  dmi_thread_t dmi_thread;
}dmi_scheduler_t;

typedef struct dmi_athread_t
{
  int8_t state;
  int32_t dmi_id;
  int32_t athread_id;
  dmi_thread_t dmi_thread;
}dmi_athread_t;

typedef struct rescale_t
{
  int32_t athread_id;
  int64_t ranks_addr;
  int64_t flags_addr;
  int64_t pnum_addr;
  int64_t exit_flag_addr;
  int64_t judge_addr;
  int64_t scaleunit_addr;
  int64_t dmi_barrier_addr;
  int64_t dmi_barrier2_addr;
}rescale_t;

typedef struct scale_thread_t
{
  int32_t my_rank;
  int32_t pnum;
  barrier_t *barrier;
}scale_thread_t;

#pragma pack (pop)

#define throw_or_catch(catch_flag, ret) if(catch_flag == TRUE) {catch(ret);} else {throw(ret);}

typedef dmi_scheduler_t DMI_scheduler_t;
typedef dmi_thread_t DMI_thread_t;
typedef dmi_node_t DMI_node_t;
typedef dmi_spinlock_t DMI_spinlock_t;
typedef dmi_mutex_t DMI_mutex_t;
typedef dmi_cond_t DMI_cond_t;
typedef dmi_profile_t DMI_profile_t;
typedef dmi_barrier_t DMI_barrier_t;
typedef dmi_idpool_t DMI_idpool_t;
typedef dmi_rwset_t DMI_rwset_t;
typedef dmi_rwset2_t DMI_rwset2_t;
typedef profile_t DMI_local_profile_t;
typedef barrier_t DMI_local_barrier_t;
typedef rwset_t DMI_local_rwset_t;
typedef rwset2_t DMI_local_rwset2_t;
typedef status_t DMI_local_status_t;
typedef group_t DMI_local_group_t;

extern int _channel[2];
extern int _scheduler_channel[2];
extern wdmi_t *_wdmi;
extern dmi_mutex_t DMI_MUTEX_INITIALIZER;
extern dmi_cond_t DMI_COND_INITIALIZER;
extern __thread wdmi_thread_t *_tls_wdmi_thread;
extern __thread int8_t _tls_thread_malloc_flag;
extern __thread int _tls_profile_flag;
extern __thread int8_t _tls_profile_rec_flag;

extern void  __attribute__ ((weak)) DMI_main(int argc, char **argv);
extern int64_t __attribute__ ((weak)) DMI_thread(int64_t dmi_addr);
extern int32_t __attribute__ ((weak)) DMI_start_scale(dmi_node_t *in_nodes, int in_node_num);
extern int32_t __attribute__ ((weak)) DMI_judge_rescale(dmi_node_t *in_nodes, dmi_node_t *out_nodes, dmi_node_t *cur_nodes, int in_node_num, int out_node_num, int cur_node_num);
extern int32_t __attribute__ ((weak)) DMI_scaleunit(int my_rank, int pnum, int64_t scaleunit_addr);
extern void __attribute__ ((weak))  DMI_function(void *page_ptr, int64_t size, void *out_ptr, int64_t out_size, void *in_ptr, int64_t in_size, int8_t tag);


/* main */

int main(int argc, char **argv);
int64_t dmi_thread(int64_t dmi_addr, int64_t value1, int64_t value2, int64_t value3);
void dmi_function(void *page_ptr, int64_t copy_size, void *out_ptr, int64_t out_size, void *in_ptr, int64_t in_size, int8_t tag);

/* wrapper api */

int32_t DMI_rank(int32_t *dmi_id_ptr);
int32_t DMI_create(dmi_thread_t *dmi_thread_ptr, int32_t dmi_id, int64_t dmi_addr, int64_t stack_size, status_t *status);
int32_t DMI_create2(dmi_thread_t *dmi_thread_ptr, int32_t dmi_id, int64_t dmi_addr, int64_t stack_size, int8_t pheap_flag, int64_t value1, int64_t value2, int64_t value3, status_t *status);
int32_t DMI_join(dmi_thread_t dmi_thread, int64_t *dmi_addr_ptr, status_t *status);
int32_t DMI_detach(dmi_thread_t dmi_thread, status_t *status);
int32_t DMI_migrate(dmi_thread_t dmi_thread, int32_t dmi_id, dmi_thread_t *dmi_thread_ptr, int8_t *migrate_flag_ptr, status_t *status);
int32_t DMI_yield(void);
int32_t DMI_wake(dmi_thread_t dmi_thread, void *out_ptr, int64_t out_size, status_t *status);
int32_t DMI_suspend(void *in_ptr);
int32_t DMI_self(dmi_thread_t *dmi_thread_ptr);
int32_t DMI_fork(char *option, int32_t dmi_id, dmi_node_t *dmi_node_ptr);
int32_t DMI_kill(int32_t dmi_id);
int32_t DMI_mmap(int64_t *dmi_addr_ptr, int64_t page_size, int64_t page_num, status_t *status);
int32_t DMI_munmap(int64_t dmi_addr, status_t *status);
int32_t DMI_group_init(group_t *group, int64_t *addrs, int64_t *ptr_offsets, int64_t *sizes, int32_t group_num);
int32_t DMI_group_destroy(group_t *group);
int32_t DMI_read(int64_t dmi_addr, int64_t size, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_group_read(group_t *group, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_write(int64_t dmi_addr, int64_t size, void *out_ptr, int8_t access_type, status_t *status);
int32_t DMI_group_write(group_t *group, void *out_ptr, int8_t access_type, status_t *status);
int32_t DMI_watch(int64_t dmi_addr, int64_t size, void *in_ptr, void *out_ptr, status_t *status);
int32_t DMI_atomic(int64_t dmi_addr, int64_t size, void *out_ptr, int64_t out_size, void *in_ptr, int64_t in_size, int8_t tag, int8_t access_type, status_t *status);
int32_t DMI_cas(int64_t dmi_addr, int64_t size, void *cmp_ptr, void *swap_ptr, int8_t *cas_flag_ptr, int8_t access_type, status_t *status);
int32_t DMI_fas(int64_t dmi_addr, int64_t size, void *out_ptr, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_fad(int64_t dmi_addr, int64_t add_value, int64_t *fetch_value_ptr, int8_t access_type, status_t *status);
int32_t DMI_save(int64_t dmi_addr, int64_t size);
int32_t DMI_unsave(int64_t dmi_addr, int64_t size);
int32_t DMI_new_object(int32_t *object_ids, int64_t *object_sizes, int32_t object_num, status_t *status);
int32_t DMI_delete_object(int32_t *object_ids, int32_t object_num, status_t *status);
int32_t DMI_read_object(int32_t object_id, int64_t object_offset, int64_t size, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_gread_object(int32_t object_id, int64_t *object_offsets, int64_t *sizes, int64_t *ptr_offsets, int32_t object_num, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_write_object(int32_t object_id, int64_t object_offset, int64_t size, void *out_ptr, int8_t access_type, status_t *status);
int32_t DMI_gwrite_object(int32_t object_id, int64_t *object_offsets, int64_t *sizes, int64_t *ptr_offsets, int32_t object_num, void *out_ptr, int8_t access_type, status_t *status);
int32_t DMI_watch_object(int32_t object_id, int64_t object_offset, int64_t size, void *in_ptr, void *out_ptr, status_t *status);
int32_t DMI_atomic_object(int32_t object_id, void *out_ptr, int64_t out_size, void *in_ptr, int64_t in_size, int8_t tag, int8_t access_type, status_t *status);
int32_t DMI_cas_object(int32_t object_id, int64_t object_offset, int64_t size, void *cmp_ptr, void *swap_ptr, int8_t *cas_flag_ptr, int8_t access_type, status_t *status);
int32_t DMI_fas_object(int32_t object_id, int64_t object_offset, int64_t size, void *out_ptr, void *in_ptr, int8_t access_type, status_t *status);
int32_t DMI_getsize_object(int32_t object_id, int64_t *slide_size_ptr, int8_t access_type, status_t *status);
int32_t DMI_setsize_object(int32_t object_id, int64_t slide_size, int8_t access_type, status_t *status);
int32_t DMI_save_object(int32_t object_id);
int32_t DMI_unsave_object(int32_t object_id);
int32_t DMI_check(status_t *status, int32_t *ret_ptr);
void DMI_wait(status_t *status, int32_t *ret_ptr);
int32_t DMI_malloc(int64_t *addr_ptr, int64_t size, int64_t page_size);
int32_t DMI_realloc(int64_t old_addr, int64_t *new_addr_ptr, int64_t size, int64_t page_size);
int32_t DMI_free(int64_t addr);
void* DMI_thread_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
void* DMI_thread_mremap(void *old_address, size_t old_size, size_t new_size, int flags);
int32_t DMI_thread_mprotect(void *addr, size_t len, int prot);
int32_t DMI_thread_munmap(void *start, size_t length);
void* DMI_thread_malloc(int64_t size);
void* DMI_thread_realloc(void *ptr, int64_t size);
void DMI_thread_free(void *ptr);
int32_t DMI_spin_init(int64_t dmi_spinlock_addr);
int32_t DMI_spin_destroy(int64_t dmi_spinlock_addr);
int32_t DMI_spin_lock(int64_t dmi_spinlock_addr);
int32_t DMI_spin_unlock(int64_t dmi_spinlock_addr);
int32_t DMI_spin_trylock(int64_t dmi_spinlock_addr, int32_t *try_flag_ptr);
int32_t DMI_mutex_init(int64_t dmi_mutex_addr);
int32_t DMI_mutex_destroy(int64_t dmi_mutex_addr);
int32_t DMI_mutex_lock(int64_t dmi_mutex_addr);
int32_t DMI_mutex_unlock(int64_t dmi_mutex_addr);
int32_t DMI_mutex_trylock(int64_t dmi_mutex_addr, int32_t *try_flag_ptr);
int32_t DMI_cond_init(int64_t dmi_cond_addr);
int32_t DMI_cond_destroy(int64_t dmi_cond_addr);
int32_t DMI_cond_wait(int64_t dmi_cond_addr, int64_t dmi_mutex_addr);
int32_t DMI_cond_signal(int64_t dmi_cond_addr);
int32_t DMI_cond_broadcast(int64_t dmi_cond_addr);
int32_t DMI_idpool_init(int64_t dmi_idpool_addr, int32_t id_max);
int32_t DMI_idpool_destroy(int64_t dmi_idpool_addr);
int32_t DMI_idpool_get(int64_t dmi_idpool_addr, int32_t *id_ptr);
int32_t DMI_idpool_put(int64_t dmi_idpool_addr, int32_t id);
int32_t DMI_rwset_init(int64_t dmi_rwset_addr, int64_t element_num, int64_t element_size, int32_t rwset_num);
int32_t DMI_rwset_destroy(int64_t dmi_rwset_addr);
int32_t DMI_rwset_decompose(int64_t dmi_rwset_addr, int32_t my_id, int64_t *write_elements, int32_t write_element_num);
int32_t DMI_local_rwset_init(rwset_t *rwset, int64_t dmi_rwset_addr, int32_t my_id, int64_t *read_elements, int32_t read_element_num);
int32_t DMI_local_rwset_destroy(rwset_t *rwset);
int32_t DMI_local_rwset_write(rwset_t *rwset, void *buf, status_t *status);
int32_t DMI_local_rwset_read(rwset_t *rwset, void *buf, status_t *status);
int32_t DMI_rwset2_init(int64_t dmi_rwset2_addr, int64_t element_num, int64_t element_size, int32_t rwset2_num);
int32_t DMI_rwset2_destroy(int64_t dmi_rwset2_addr);
int32_t DMI_rwset2_decompose(int64_t dmi_rwset2_addr, int32_t my_id, int64_t *write_elements, int32_t write_element_num, int64_t *read_elements, int32_t read_element_num);
int32_t DMI_local_rwset2_init(rwset2_t *rwset2, int64_t dmi_rwset2_addr, int32_t my_id);
int32_t DMI_local_rwset2_destroy(rwset2_t *rwset2);
int32_t DMI_local_rwset2_write(rwset2_t *rwset2, void *buf, status_t *status);
int32_t DMI_local_rwset2_read(rwset2_t *rwset2, void *buf, status_t *status);
int32_t DMI_barrier_init(int64_t dmi_barrier_addr);
int32_t DMI_barrier_destroy(int64_t dmi_barrier_addr);
int32_t DMI_local_barrier_init(barrier_t *barrier, int64_t dmi_barrier_addr);
int32_t DMI_local_barrier_destroy(barrier_t *barrier);
int32_t DMI_local_barrier_sync(barrier_t *barrier, int32_t pnum);
int32_t DMI_local_barrier_allreduce(barrier_t *barrier, int32_t pnum, void *sub_value_ptr, void *value_ptr, int8_t op_type, int8_t type_type);
int32_t DMI_welcome(int32_t dmi_id);
int32_t DMI_goodbye(int32_t dmi_id);
int32_t DMI_poll(dmi_node_t *dmi_node_ptr);
int32_t DMI_peek(dmi_node_t *dmi_node_ptr, int8_t *flag_ptr);
int32_t DMI_nodes(dmi_node_t *dmi_node_array, int32_t *num_ptr, int32_t capacity);
int32_t DMI_rescale(int64_t scaleunit_addr, int32_t init_node_num, int32_t thread_num);
int32_t DMI_check_rescale(int32_t *judge_ptr);
int32_t DMI_scheduler_init(int64_t dmi_scheduler_addr);
int32_t DMI_scheduler_destroy(int64_t dmi_scheduler_addr);
int32_t DMI_scheduler_create(int64_t dmi_scheduler_addr, int32_t *sthread_id_ptr, int64_t dmi_addr);
int32_t DMI_scheduler_detach(int64_t dmi_scheduler_addr, int32_t sthread_id);
int32_t DMI_scheduler_join(int64_t dmi_scheduler_addr, int32_t sthread_id, int64_t *dmi_addr_ptr);

/* rescale */

int32_t dmi_rescale_scale(int64_t scaleunit_addr, int32_t init_node_num, int32_t thread_num);
int32_t dmi_rescale_main(int64_t rescale_addr);
int32_t sub_judge_rescale(void);
int32_t dmi_rescale_check(int32_t *judge_ptr);

/* scheduler */

int32_t dmi_scheduler_init(int64_t dmi_scheduler_addr);
int32_t dmi_scheduler_destroy(int64_t dmi_scheduler_addr);
int32_t dmi_scheduler_create(int64_t dmi_scheduler_addr, int32_t *sthread_id_ptr, int64_t dmi_addr);
int32_t dmi_scheduler_detach(int64_t dmi_scheduler_addr, int32_t sthread_id);
int32_t dmi_scheduler_join(int64_t dmi_scheduler_addr, int32_t sthread_id, int64_t *dmi_addr_ptr);
int32_t dmi_scheduler_main(int64_t dmi_scheduler_addr, int32_t sthread_id, int64_t dmi_addr);
void sub_sort_nodes(dmi_node_t *nodes, int32_t node_num);
void dmi_scheduler_monitor(int64_t dmi_scheduler_addr);

/* malloc/realloc/free */

void* my_malloc_hook(int64_t size);
void* my_realloc_hook(void *old_p, int64_t size);
void my_free_hook(void *p);
void* dmi_kr_malloc(int64_t size);
void* dmi_kr_realloc(void *ptr, int64_t size);
void dmi_kr_free(void *ptr);

/* mutex/cond */

int32_t dmi_spin_init(int64_t dmi_spinlock_addr);
int32_t dmi_spin_destroy(int64_t dmi_spinlock_addr);
int32_t dmi_spin_lock(int64_t dmi_spinlock_addr);
int32_t dmi_spin_unlock(int64_t dmi_spinlock_addr);
int32_t dmi_spin_trylock(int64_t dmi_spinlock_addr, int32_t *try_flag_ptr);
int32_t dmi_mutex_init(int64_t dmi_mutex_addr);
int32_t dmi_mutex_destroy(int64_t dmi_mutex_addr);
int32_t dmi_mutex_lock(int64_t dmi_mutex_addr);
int32_t dmi_mutex_unlock(int64_t dmi_mutex_addr);
int32_t dmi_mutex_trylock(int64_t dmi_mutex_addr, int32_t *try_flag_ptr);
int32_t dmi_cond_init(int64_t dmi_cond_addr);
int32_t dmi_cond_destroy(int64_t dmi_cond_addr);
int32_t dmi_cond_wait(int64_t dmi_cond_addr, int64_t dmi_mutex_addr);
int32_t dmi_cond_signal(int64_t dmi_cond_addr);
int32_t dmi_cond_broadcast(int64_t dmi_cond_addr);
int32_t dmi_idpool_init(int64_t dmi_idpool_addr, int32_t id_max);
int32_t dmi_idpool_destroy(int64_t dmi_idpool_addr);
int32_t dmi_idpool_get(int64_t dmi_idpool_addr, int32_t *id_ptr);
int32_t dmi_idpool_put(int64_t dmi_idpool_addr, int32_t id);

/* rwset */

int32_t dmi_rwset_init(int64_t dmi_rwset_addr, int64_t element_num, int64_t element_size, int32_t rwset_num);
int32_t dmi_rwset_destroy(int64_t dmi_rwset_addr);
int32_t dmi_rwset_decompose(int64_t dmi_rwset_addr, int32_t my_id, int64_t *write_elements, int32_t write_element_num);
rwset_t* rwset_alloc(void);
void rwset_free(rwset_t *rwset);
int32_t rwset_init(rwset_t *rwset, int64_t dmi_rwset_addr, int32_t my_id, int64_t *read_elements, int32_t read_element_num);
int32_t rwset_destroy(rwset_t *rwset);
int32_t rwset_write(rwset_t *rwset, void *buf, status_t *status);
int32_t rwset_read(rwset_t *rwset, void *buf, status_t *status);
int32_t dmi_rwset2_init(int64_t dmi_rwset2_addr, int64_t element_num, int64_t element_size, int32_t rwset2_num);
int32_t dmi_rwset2_destroy(int64_t dmi_rwset2_addr);
int32_t dmi_rwset2_decompose(int64_t dmi_rwset2_addr, int32_t my_id, int64_t *write_elements, int32_t write_element_num, int64_t *read_elements, int32_t read_element_num);
rwset2_t* rwset2_alloc(void);
void rwset2_free(rwset2_t *rwset2);
int32_t rwset2_init(rwset2_t *rwset2, int64_t dmi_rwset2_addr, int32_t my_id);
int32_t rwset2_destroy(rwset2_t *rwset2);
int32_t rwset2_write(rwset2_t *rwset2, void *buf, status_t *status);
int32_t rwset2_read(rwset2_t *rwset2, void *buf, status_t *status);

/* barrier */

int32_t dmi_barrier_init(int64_t dmi_barrier_addr);
int32_t dmi_barrier_destroy(int64_t dmi_barrier_addr);
barrier_t* barrier_alloc(void);
void barrier_free(barrier_t *barrier);
int32_t barrier_init(barrier_t *barrier, int64_t dmi_barrier_addr);
int32_t barrier_destroy(barrier_t *barrier);
int32_t barrier_allreduce_int64(barrier_t *barrier, int32_t sync_num, int64_t sub_sum, int64_t *sum_ptr, int8_t op_type);
int32_t barrier_allreduce_double(barrier_t *barrier, int32_t sync_num, double sub_sum, double *sum_ptr, int8_t op_type);

/* mthread */

int32_t dmi_mthread_init(int64_t dmi_mthread_addr);
int32_t dmi_mthread_destroy(int64_t dmi_mthread_addr);
int32_t dmi_mthread_get(int64_t dmi_mthread_addr, int32_t *mthread_id_ptr, int64_t *stackarea_addr_ptr);
int32_t dmi_mthread_put(int64_t dmi_mthread_addr, int32_t mthread_id);

/* member */

int32_t dmi_member_init(int64_t dmi_member_addr);
int32_t dmi_member_destroy(int64_t dmi_member_addr);
int32_t dmi_member_join(int64_t dmi_member_addr, int32_t dmi_id, int32_t core_num, int64_t mem_size, dmi_node_t *dmi_node_ptr, int8_t detach_flag);
int32_t dmi_member_leave(int64_t dmi_member_addr, int32_t dmi_id, int8_t detach_flag);
int32_t dmi_member_welcome(int64_t dmi_member_addr, int32_t dmi_id);
int32_t dmi_member_goodbye(int64_t dmi_member_addr, int32_t dmi_id);
int32_t dmi_member_poll(int64_t dmi_member_addr, dmi_node_t *dmi_node_ptr);
int32_t dmi_member_peek(int64_t dmi_member_addr, dmi_node_t *dmi_node_ptr, int8_t *flag_ptr);
int32_t dmi_member_nodes(int64_t dmi_member_addr, dmi_node_t *dmi_node_array, int32_t *num_ptr, int32_t capacity);
int32_t dmi_member_fad(int64_t dmi_member_addr, int32_t add_value, int32_t *fetch_value_ptr);
int32_t dmi_member_rescalefad(int64_t dmi_member_addr, int32_t add_value, int32_t *fetch_value_ptr);

/* memalloc */

int32_t dmi_memalloc_init(int64_t dmi_memalloc_addr);
int32_t dmi_memalloc_destroy(int64_t dmi_memalloc_addr);
int32_t dmi_memalloc_malloc(int64_t dmi_memalloc_addr, int64_t *addr_ptr, int64_t size, int64_t page_size, int8_t rec_flag);
int32_t dmi_memalloc_realloc(int64_t dmi_memalloc_addr, int64_t old_addr, int64_t *new_addr_ptr, int64_t size, int64_t page_size, int8_t rec_flag);
int32_t dmi_memalloc_free(int64_t dmi_memalloc_addr, int64_t addr, int8_t rec_flag);

/* forkkill */

int32_t dmi_forkkill_init(int64_t dmi_forkkill_addr);
int32_t dmi_forkkill_destroy(int64_t dmi_forkkill_addr);
int32_t dmi_forkkill_consumepre(int64_t dmi_forkkill_addr, forkkill_t *forkkill_ptr, int8_t *exit_flag_ptr);
int32_t dmi_forkkill_consumepost(int64_t dmi_forkkill_addr, dmi_node_t dmi_node);
int32_t dmi_forkkill_produce(int64_t dmi_forkkill_addr, int8_t command_type, int32_t dmi_id, dmi_node_t *dmi_node_ptr, char *option);

/* profile */

int32_t dmi_profile_init(int64_t dmi_profile_addr, int8_t type, char *name);
int32_t dmi_profile_destroy(int64_t dmi_profile_addr);
profile_t* profile_alloc(void);
void profile_free(profile_t *profile);
int32_t profile_init(profile_t *profile, int64_t dmi_profile_addr);
int32_t profile_destroy(profile_t *profile);
int32_t profile_record_mmap(profile_t *profile, int64_t dmi_addr);
int32_t profile_record_munmap(profile_t *profile, int64_t dmi_addr);
int32_t profile_record_access(profile_t *profile, int64_t dmi_addr, int64_t size);
int32_t profile_decide_page_size(profile_t *profile, int64_t *page_size_ptr);
int8_t profile_get_type(profile_t *profile);

/* tools */

wdmi_t* wdmi_alloc(void);
void wdmi_free(wdmi_t *wdmi);
wdmi_thread_t* wdmi_thread_alloc(void);
void wdmi_thread_free(wdmi_thread_t *wdmi_thread);
scale_thread_t* scale_thread_alloc(void);
void scale_thread_free(scale_thread_t *scale_thread);
ticket_t* ticket_alloc(int64_t dmi_forkkill_addr);
void ticket_free(ticket_t *ticket);
void* ticket_proxyer1(void *arg);
void ticket_proxyer2(ticket_t *ticket);
void ticket_monitor(ticket_t *ticket);
void ticket_joinchild(ticket_t *ticket);
void signal_handler(int signum, siginfo_t *siginfo, void *ucontext);
config_t* config_alloc(int argc, char **argv);
void config_free(config_t *config);
record_t* record_alloc(void);
void record_free(record_t *record);
mheader_t* mheader_alloc(void);
void mheader_free(mheader_t *mheader);
int64_t sub_convert_addr(int64_t stackarea_addr, int64_t addr1, int64_t addr2);
int64_t sub_revert_addr(int64_t stackarea_addr);

#endif
