Commit 1a7821fb authored by Charlie Jacobsen's avatar Charlie Jacobsen Committed by Vikram Narayanan

libasync-integration: Remove old duplicates of thc code.

parent 8368c6ca
/* Barrelfish THC language extensions */
#ifndef _THC_H_
#define _THC_H_
#ifdef LCD_DOMAINS
#include <linux/types.h>
#include <lcd_domains/types.h>
#include <liblcd/console.h>
#define assert(XX) do { \
if (!(XX)) { \
lcd_printk("assertion failure at %s:%d\n", \
__FILE__, __LINE__); \
} \
} while(0)
#else
#include <stdint.h>
#include <stdlib.h>
#endif
#ifndef BARRELFISH
typedef int errval_t;
#define SYS_ERR_OK 0
#define THC_CANCELED 1
#endif
void thc_init(void);
void thc_done(void);
// The implementation of do..finish relies on shadowing so that
// _fb_info always refers to the closest enclosing do..finish block.
#pragma GCC diagnostic ignored "-Wshadow"
/*......................................................................*/
// Macros for use from user code
//
// Cancel. _TAG is the name of an enclosig DO_FINISH block.
#define CANCEL(_TAG) \
do { \
_thc_do_cancel_request(_fb_info_ ## _TAG); \
} while (0)
// DO_FINISH block. Non-cancellable:
#define DO_FINISH_NX(_CODE) DO_FINISH__(__,_CODE,1)
// Cancellable do-finish, with tag:
#define DO_FINISH_(_TAG,_CODE) DO_FINISH__(_TAG,_CODE,0)
// Cancellable do-finish, no tag:
#define DO_FINISH(_CODE) DO_FINISH__(__,_CODE,0)
// ASYNC implementation. __COUNTER__ is a GCC extension that will
// allocate a unique integer ID on each expansion. Hence use it once
// here and use the resulting expanded value in ASYNC_:
#define ASYNC(_BODY) ASYNC_(_BODY, __COUNTER__)
#ifdef CONFIG_LAZY_THC
// DO_FINISH implementation:
#define DO_FINISH__(_TAG,_CODE,_IS_NX) \
do { \
finish_t _fb; \
finish_t *_fb_info __attribute__((unused)) = &_fb; \
finish_t *_fb_info_ ## _TAG __attribute__((unused)) = _fb_info; \
void *_fb_curr_stack __attribute__((unused)); \
FORCE_FRAME_POINTER_USE; \
GET_STACK_POINTER(_fb_info->old_sp); \
_thc_startfinishblock(_fb_info, _IS_NX); \
do { _CODE } while (0); \
GET_STACK_POINTER(_fb_curr_stack); \
_thc_endfinishblock(_fb_info, _fb_curr_stack); \
if (_fb_info->old_sp != _fb_curr_stack) { \
RESTORE_OLD_STACK_POINTER(_fb_info->old_sp); \
_thc_pendingfree(); \
} \
} while (0)
// The basic idea for ASYNC_ is that the contents of the block becomes
// a nested function. For the lazy implementation, we create an AWE for
// the continuation of the block and store a reference to it on the nested
// function's stackframe. We then mark this as an async by replacing the
// return address of the nested function with a special marker. If we block,
// the runtime system looks through the stack for this marker, allocating
// a lazy stack for any AWE continuations it finds.
//
// We are careful to avoid taking the address of the nested function.
// This prevents GCC trying to generate stack-allocated trampoline functions
// (this is not implemented on Beehive where the I and D caches are not
// coherent).
//
// There are several hacks:
//
// 1. The nested function is given an explicit name in the generated
// assembly language code (NESTED_FN_STRING(_C)). This means that
// inline asm can refer to it without needing to take the address
// of the nested function.
//
// 2. The nested function specifically jumps to the point after the async
// continuation rather than returning normally, since (i) we have fiddled
// with the return address to make it a marker and (ii), changing it back
// then returning normally confuses the branch prediction hardware leading
// to an increase in async cost by about 40 cycles (25 cycles -> 65 cycles)
//
#define ASYNC_(_BODY, _C) \
do { \
awe_t _awe; \
extern void * CONT_RET_FN_NAME(_C) (void); \
\
/* Define nested function containing the body */ \
noinline auto void _thc_nested_async(FORCE_ARGS_STACK awe_t *awe) __asm__(NESTED_FN_STRING(_C)); \
noinline void _thc_nested_async(FORCE_ARGS_STACK awe_t *awe) { \
void *_my_fb = _fb_info; \
_awe.current_fb = _my_fb; \
INIT_LAZY_AWE(awe, &_thc_lazy_awe_marker); \
do { _BODY; } while (0); \
/* If return address is NULLed then we blocked */ \
if (__builtin_return_address(0) == NULL) { \
/* thc_startasync is performed lazily, we should run */ \
/* _thc_endasync if we blocked*/ \
_thc_endasync(_my_fb, __builtin_frame_address(0)+(2*__WORD_SIZE)); \
} \
/* Otherwise, return */ \
RETURN_CONT(CONT_RET_FN_STRING(_C)); \
} \
_awe.status = LAZY_AWE; \
_awe.lazy_stack = NULL; \
_awe.pts = NULL; \
\
__asm__ volatile("nop \n\t"); \
__asm__ volatile("nop \n\t"); \
__asm__ volatile("nop \n\t"); \
__asm__ volatile("nop \n\t"); \
__asm__ volatile("nop \n\t"); \
\
SCHEDULE_CONT(&_awe, _thc_nested_async); \
__asm__ volatile ( \
" .globl " CONT_RET_FN_STRING(_C) "\n\t" \
" " CONT_RET_FN_STRING(_C) ": \n\t" \
); \
} while (0)
#else // EAGER_THC
// DO_FINISH implementation:
#define DO_FINISH__(_TAG,_CODE,_IS_NX) \
do { \
finish_t _fb; \
finish_t *_fb_info __attribute__((unused)) = &_fb; \
finish_t *_fb_info_ ## _TAG __attribute__((unused)) = _fb_info; \
_thc_startfinishblock(_fb_info, _IS_NX); \
do { _CODE } while (0); \
_thc_endfinishblock(_fb_info, NULL); \
} while (0)
// The basic idea for ASYNC_ is that the contents of the block becomes
// a nested function. We create an AWE for the continuation of the
// block (passing it to the runtime system via SCHEDULE_CONT).
// We then execute the block on a new stack. This is a bit cumbersome:
//
// - Allocate the stack memory _thc_allocstack
//
// - Define a "swizzle" function that will transfer execution onto the
// new stack, capturing the stack and target function address from
// its environment
//
// We are careful to avoid taking the address of the nested function.
// This prevents GCC trying to generate stack-allocated trampoline functions
// (this is not implemented on Beehive where the I and D caches are not
// coherent).
//
// There are several hacks:
//
// 1. The nested function is given an explicit name in the generated
// assembly language code (NESTED_FN_STRING(_C)). This means that
// inline asm can refer to it without needing to take the address
// of the nested function.
//
// 2. The swizzle function passes control to the nested function assuming
// that the calling conventions are the same for the two functions.
// In particular, the swizzle function is called with the same
// static chain as the underlying nested function.
#define ASYNC_(_BODY, _C) \
do { \
awe_t _awe; \
void *_new_stack = _thc_allocstack(); \
_awe.current_fb = _fb_info; \
/* Define nested function containing the body */ \
auto void _thc_nested_async(void) __asm__(NESTED_FN_STRING(_C)); \
__attribute__((noinline,used)) void _thc_nested_async(void) { \
void *_my_fb = _fb_info; \
void *_my_stack = _new_stack; \
_thc_startasync(_my_fb, _my_stack); \
do { _BODY; } while (0); \
_thc_endasync(_my_fb, _my_stack); \
assert(0 && "_thc_endasync returned"); \
} \
\
/* Define function to enter _nested on a new stack */ \
auto void _swizzle(void) __asm__(SWIZZLE_FN_STRING(_C)); \
SWIZZLE_DEF(_swizzle, _new_stack, NESTED_FN_STRING(_C)); \
\
/* Add AWE for our continuation, then run "_nested" on new stack */ \
if (!SCHEDULE_CONT(&_awe)) { \
_swizzle(); \
assert(0 && "_swizzle returned"); \
} \
} while (0)
#endif // CONFIG_LAZY_THC
// Helper macros used in creating the assembly language name for the
// nested function
#define THC_STR_(X) #X
#define THC_STR(X) THC_STR_(X)
#define NESTED_FN_NAME(C) _thc_nested_async_ ## C
#define NESTED_FN_STRING(C) THC_STR(NESTED_FN_NAME(C))
#define SWIZZLE_FN_NAME(C) _thc_swizzle_ ## C
#define SWIZZLE_FN_STRING(C) THC_STR(SWIZZLE_FN_NAME(C))
#define CONT_RET_FN_NAME(C) _thc_cont_return_ ## C
#define CONT_RET_FN_STRING(C) THC_STR(CONT_RET_FN_NAME(C))
/*......................................................................*/
// Prototypes for functions to be called by user code (as opposed to
// by the implementations of compiler intrinsics)
// Initialize the runtime system for the given thread:
//
// - execute "fn(args)" within it, as the first AWE
//
// - return the result of "fn(args)" when it completes
//
// - call "idle_fn(idle_args)" whenver there is no work
// (or assert-fail if idle and idle_fn is NULL)
typedef int (*THCFn_t)(void *);
typedef void (*THCIdleFn_t)(void *);
//extern int THCRun(THCFn_t fn,
// void *args,
// THCIdleFn_t idle_fn,
// void *idle_args);
// An AWE is an asynchronous work element. It runs to completion,
// possibly producing additional AWEs which may be run subsequently.
typedef struct awe_t awe_t;
// Finish the current AWE, and initialize (*awe_ptr_ptr) with a pointer
// to an AWE for its continuation. Typically, this will be stashed
// away in a data structure from which it will subsequently be fetched
// and passed to THCSchedule.
void THCSuspend(awe_t **awe_ptr_ptr);
// As THCSuspend, but execute "then_fn(arg)" before returning to the
// scheduler.
typedef void (*THCThenFn_t)(void *);
void THCSuspendThen(awe_t **awe_ptr_ptr, THCThenFn_t then, void *arg);
// Schedule execution of a given AWE at the head/tail of the queue.
void THCSchedule(awe_t *awe_ptr);
void THCScheduleBack(awe_t *awe_ptr);
// Finish the current AWE, returning to the scheduler.
void THCFinish(void);
// Finish the current AWE, creating a new AWE from its continuation, and
// passing this immediately to the scheduler.
void THCYield(void);
// Hint that the runtime system should switch from the current AWE to the
// indicated awe_ptr. (Currently, the implementation does this immediately
// if awe_ptr runs on the same thread as the caller. It puts the continuation
// to THCYieldTo on the run-queue.)
void THCYieldTo(awe_t *awe_ptr);
// Cancellation actions. These are executed in LIFO order when cancellation
// occurs. Once cancellation has been requested, it is assumed that no
// further cancellation actions will be added. Cancellation actions can be
// added and removed in any order (not just LIFO) -- in practice this occurs
// when they are added/removed in different async branches.
//
// The structure of a cancel_item_t should be treated as opaque: it is
// defined here so that its size is known, and hence so that it can be
// stack-allocated by callers to THCAdd/RemoveCancelItem.
typedef void (*THCCancelFn_t)(void *);
typedef struct cancel_item_t cancel_item_t;
struct cancel_item_t {
THCCancelFn_t fn;
void *arg;
int was_run;
cancel_item_t *prev;
cancel_item_t *next;
};
void THCAddCancelItem(cancel_item_t *ci, THCCancelFn_t fn, void *arg);
void THCRemoveCancelItem(cancel_item_t *ci);
int THCCancelItemRan(cancel_item_t *ci);
// Test for cancellation request
int THCIsCancelRequested(void);
// Dump debugging stats (if available, otherwise no-op)
void THCDumpStats(int clear_stats);
void THCIncSendCount(void);
void THCIncRecvCount(void);
/*......................................................................*/
/*......................................................................*/
#include "thcsync.h"
#include "thcinternal.h"
#endif // _THC_H_
/* Barrelfish THC language extensions */
#ifndef _THC_INTERNAL_H_
#define _THC_INTERNAL_H_
/***********************************************************************/
typedef struct ptstate_t PTState_t;
typedef struct thcstack_t thcstack_t;
typedef struct finish_t finish_t;
struct thc_latch;
typedef void (*THCIdleFn_t)(void *);
// Definition of an AWE, asynchronous work element. This definition must
// match the assembly-language definitions at the bottom of thc.c which
// access fields in the AWE structure.
enum awe_status {
EAGER_AWE = 0,
LAZY_AWE,
NEEDS_LAZY_STACK,
ALLOCATED_LAZY_STACK
};
struct awe_t {
// Fields representing the code to run when the AWE is executed.
void *eip;
void *ebp;
void *esp;
// Can be EAGER_ASYNC, LAZY_ASYNC or NEEDS_LASY_STACK
enum awe_status status;
// Stack which is allocated if awe is caller yields to this AWE.
void *lazy_stack;
// Link from an AWE to the per-thread state for the thread it
// runs in.
PTState_t *pts;
// Link from an AWE to the immediately-enclosing finish
finish_t *current_fb;
// Fields used by the runtime system to link together AWEs, e.g.,
// on a thread's run-queue, or on a list of waiters on a
// synchronization object.
struct awe_t *prev;
struct awe_t *next;
};
/***********************************************************************/
// Definition of a finish block's data structure.
//
// Finish blocks are held on a linked list threaded through the start_node
// and end_node fields. The blocks dynamically nested within a given
// finish block are held between these two nodes. (This enables easy
// iteration through all these dynamically nested blocks).
typedef struct finish_list_t finish_list_t;
struct finish_list_t {
finish_list_t *prev;
finish_list_t *next;
finish_t *fb;
};
struct finish_t {
void *old_sp; /* stack pointer when entering do {} finish */
unsigned long count;
struct awe_t *finish_awe;
int fb_kind;
int cancel_requested;
finish_list_t start_node;
finish_list_t end_node;
finish_t *enclosing_fb;
void *enclosing_lazy_stack;
struct cancel_item_t *cancel_item;
};
/***********************************************************************/
// Per-thread runtime system state
struct thcstack_t {
struct thcstack_t *next;
};
struct ptstate_t {
// Thread-local fields: .............................................
// Head/tail sentinels of the dispatch list
struct awe_t aweHead;
struct awe_t aweTail;
// Immediately-enclosing finish block for the currently running code
finish_t *current_fb;
// Initialization / termination flags
int doneInit;
int shouldExit;
// Stack that the thread's dispatch loop will run on
void *dispatchStack;
// If we are running on a lazily allocated stack, this will point to its start
void *curr_lazy_stack;
// Function to execute whenever the dispatch loop is idle (e.g.,
// to block the thread until an incoming message which might change
// the state of the dispatch loop).
THCIdleFn_t idle_fn;
void *idle_args;
void *idle_stack;
// Stack to be de-allocated on the next execution of the dispatch loop
// (an async call terminates by re-entering the dispatch loop with
// pendingFree set to the stack it was using. It cannot dealloacte
// its own stack while it is in use).
void *pendingFree;
// AWE to enter for the dispatch loop on this thread
struct awe_t dispatch_awe;
// Free stacks for re-use
struct thcstack_t *free_stacks;
#ifndef NDEBUG
// Debugging statistics
int stackMemoriesAllocated;
int stackMemoriesDeallocated;
int stacksAllocated;
int stacksDeallocated;
int finishBlocksStarted;
int finishBlocksEnded;
int asyncCallsStarted;
int asyncCallsEnded;
int aweCreated;
int aweResumed;
int idleStarted;
int idleComplete;
int cancelsRequested;
int cancelsAdded;
int cancelsRun;
int cancelsRemoved;
int getTls;
int lock;
int sendCount;
int recvCount;
#endif
// Shared fields: ...................................................
// Latch protecting the dispatch list
struct thc_latch latch;
// Head/tail sentinels of the remote dispatch list on which other
// threads place AWEs that they have unblocks but which belong to
// this thread
struct awe_t aweRemoteHead;
struct awe_t aweRemoteTail;
};
typedef void (*THCContFn_t)(void *cont, void *args);
void *_thc_allocstack(void);
void _thc_freestack(void *s);
void _thc_onaltstack(void *s, void *fn, void *args);
void _thc_startasync(void *f, void *stack);
void _thc_endasync(void *f, void *s);
void _thc_startfinishblock(finish_t *fb, int fb_kind);
void _thc_endfinishblock(finish_t *fb, void *stack);
void _thc_do_cancel_request(finish_t *fb);
void _thc_callcont(struct awe_t *awe, THCContFn_t fn, void *args) __attribute__((returns_twice));
int _thc_schedulecont(struct awe_t *awe) __attribute__((returns_twice));
void _thc_lazy_awe_marker(void);
void _thc_pendingfree(void);
/***********************************************************************/
// Symbols declared in the .text.nx section
extern int _start_text_nx;
extern int _end_text_nx;
/***********************************************************************/
/* Macro to force callee-saves to be spilled to the stack */
#if defined(__x86_64__)
#define KILL_CALLEE_SAVES() \
__asm__ volatile ("" : : : "rbx", "r12", "r13", "r14", "r15", \
"memory", "cc")
#elif defined(__i386__)
#ifdef __pic__
#define KILL_CALLEE_SAVES() \
__asm__ volatile ("" : : : "edi", "esi", "esp", "memory", "cc")
#else
#define KILL_CALLEE_SAVES() \
__asm__ volatile ("" : : : "ebx", "edi", "esi", "esp", "memory", "cc")
#endif
#elif defined(__arm__)
// see ARM Procedure Call Standard (APCS): 5.1 Machine Registers
// NB: gcc complains about clobbering two registers:
// . v8 (i.e., r11), is the frame pointer in ARM and cannot be clobbered
// . v7 is the PIC register
//
#if defined(__pic__)
#define KILL_CALLEE_SAVES() \
__asm__ volatile ("" : : : "sp", \
"v1", "v2", "v3", "v4", "v5", "v6", \
"s16", "s17", "s18", "s19", "s20", "s21", "s22", \
"s23", "s24", "s25", "s26", "s27", "s28", "s29", \
"s30", "31", \
"memory")
#else // same as before, but including v7
#define KILL_CALLEE_SAVES() \
__asm__ volatile ("" : : : "sp", \
"v1", "v2", "v3", "v4", "v5", "v6", "v7", \
"s16", "s17", "s18", "s19", "s20", "s21", "s22", \
"s23", "s24", "s25", "s26", "s27", "s28", "s29", \
"s30", "31", \
"memory")
#endif
#else
#error "Need definition of KILL_CALLEE_SAVES"
#endif
#define __WORD_SIZE (sizeof(void*))
/***********************************************************************/
#ifdef CONFIG_LAZY_THC
/***********************************************************************/
#if defined(__x86_64__)
/* Force args on stack - there must be a better way of doing this, but */
/* regparam(0) doesn't work on x86_64 */
#define FORCE_ARGS_STACK void*__a, void*__b, void*__c, void*__d, \
void*__e, void*__f,
#define FORCE_ARGS_STACK_CALL NULL, NULL, NULL, NULL, NULL, NULL,
#elif defined(__i386__)
#define FORCE_ARGS_STACK
#define FORCE_ARGS_STACK_CALL
#elif defined(__arm__)
#define FORCE_ARGS_STACK assert(0 && "THC not yet implemented on ARM")
#define FORCE_ARGS_STACK_CALL assert(0 && "THC not yet implemented on ARM")
#else
#error "Need definition of FORCE_ARGS_STACK"
#endif
#define FORCE_FRAME_POINTER_USE \
/* Do a zero byte alloca to force local variable access via ebp */ \
/* Note, this does not add any code (even with -O0. */ \
__builtin_alloca(0)
#if defined(__x86_64__)
#define GET_STACK_POINTER(STACK_PTR) \
__asm__ volatile ("movq %%rsp, %0 \n\t" \
: "=m"(STACK_PTR) : )
#define RESTORE_OLD_STACK_POINTER(OLD_STACK_PTR) \
__asm__ volatile ("movq %0, %%rsp \n\t" \
: : "m"(OLD_STACK_PTR))
#elif defined(__i386__)
#define GET_STACK_POINTER(STACK_PTR) \
__asm__ volatile ("movl %%esp, %0 \n\t" \
: "=m"(STACK_PTR) : )
#define RESTORE_OLD_STACK_POINTER(OLD_STACK_PTR) \
__asm__ volatile ("movl %0, %%esp \n\t" \
: : "m"(OLD_STACK_PTR))
#elif defined(__arm__)
#define GET_STACK_POINTER(_) assert(0 && "THC not yet implemented on ARM")
#define RESTORE_OLD_STACK_POINTER(_) assert(0 && "THC not yet implemented on ARM")
#else
#error "Need definition of GET_STACK_POINTER and RESTORE_OLD_STACK_POINTER"
#endif
#if defined(__x86_64__) || defined(__i386__)
// INIT_LAZY_AWE() is used in the beggining of the nested function in ASYNC_.
// The signature of the nested function is:
// void _thc_nested_async(FORCE_ARGS_STACK awe_t *awe)
//
// So in INIT_LAZY_AWE, the stack in x86 looks like:
// sp ->
// .......
// rbp-> [ saved rbp ] rbp[0]
// [ RET ] rbp[1]
// [ awe ] rbp[2] (passed as first arg)
#define THC_LAZY_FRAME_PREV(FRAME_PTR) *((FRAME_PTR)+0)
#define THC_LAZY_FRAME_RET(FRAME_PTR) *((FRAME_PTR)+1)
#define THC_LAZY_FRAME_AWE(FRAME_PTR) *((FRAME_PTR)+2)
#endif
#if defined(__x86_64__)
#define INIT_LAZY_AWE(AWE_PTR, LAZY_MARKER) \
__asm__ volatile ( \
" movq 8(%%rbp), %%rsi \n\t" \
" movq %%rsi, 0(%0) \n\t" /* RIP (our return address) */ \
" movq 0(%%rbp), %%rsi \n\t" \
" movq %%rsi, 8(%0) \n\t" /* RBP */ \
" movq %1, 8(%%rbp) \n\t" /* put marker as ret address */ \