vsync / spinlock / hmcslock.h
Hierarchical MCS lock for systems with NUMA Hierarchies.
Groups: Fair locks, Numa-aware
Example:
#include <vsync/spinlock/hmcslock.h>
#include <pthread.h>
#include <stdio.h>
#include <assert.h>
// To build such a hierarchical tree: //
// R //
// / \ //
// A B //
// / \ / \ //
// C D E F //
// //
// The following specs are needed: //
// level 1, has a root (1 node) //
// level 2, has two nodes connected to R the parent //
// level 3, has two nodes per parent (A/B) //
// //
// number of levels = 3 //
// number of locks = 7 //
#define NUM_LEVELS 3
#define LEVEL_1 1
#define LEVEL_2 2
#define LEVEL_3 2
#define LEVEL_1_THRESHOLD 1
#define LEVEL_2_THRESHOLD 1
#define LEVEL_3_THRESHOLD 1
hmcslock_level_spec_t g_level_specs[NUM_LEVELS] = {
{LEVEL_1, LEVEL_1_THRESHOLD},
{LEVEL_2, LEVEL_2_THRESHOLD},
{LEVEL_3, LEVEL_3_THRESHOLD},
};
#define NUM_LOCKS \
((LEVEL_3 * LEVEL_2 * LEVEL_1) + (LEVEL_1 * LEVEL_2) + (LEVEL_1))
#define N 10
#define IT 10
#define EXPECTED_VAL (N) * (IT)
vuint32_t g_x = 0;
vuint32_t g_y = 0;
hmcslock_t g_locks[NUM_LOCKS];
hmcs_node_t g_qnode[N] = {0};
void *
run(void *args)
{
vsize_t tid = (vsize_t)args;
/* the thread should be pinned to the core */
/* find the leaf lock associated with this code */
hmcslock_t *lock = hmcslock_which_lock(g_locks, NUM_LOCKS, g_level_specs,
NUM_LEVELS, LEVEL_3, tid);
for (vsize_t i = 0; i < IT; i++) {
hmcslock_acquire(lock, &g_qnode[tid], NUM_LEVELS);
g_x++;
g_y++;
assert(g_x == g_y);
hmcslock_release(lock, &g_qnode[tid], NUM_LEVELS);
}
return NULL;
}
int
main(void)
{
pthread_t threads[N];
hmcslock_init(g_locks, NUM_LOCKS, g_level_specs, NUM_LEVELS);
for (vsize_t i = 0; i < N; i++) {
pthread_create(&threads[i], NULL, run, (void *)i);
}
for (vsize_t i = 0; i < N; i++) {
pthread_join(threads[i], NULL);
}
printf("Final value %u %u\n", g_x, g_y);
assert(g_x == EXPECTED_VAL);
assert(g_x == g_y);
return 0;
}
References:
High Performance Locks for Multi-level NUMA Systems
References:
Verifying and Optimizing the HMCS Lock for Arm Servers
Functions
| Function | Description |
|---|---|
| hmcslock_which_lock | Returns the address of the lock associated with the core id. |
| hmcslock_init | Initializes the locks array. |
| hmcslock_acquire | Acquires the given lock. |
| hmcslock_release | Releases the given lock. |
Function hmcslock_which_lock
static hmcslock_t* hmcslock_which_lock(hmcslock_t *locks, vsize_t locks_len, hmcslock_level_spec_t *level_specs, vsize_t num_levels, vuint32_t cores_per_node, vuint32_t core_id)
Returns the address of the lock associated with the core id.
Parameters:
locks: array of hmcslock_t objects.locks_len:locksarray length.level_specs: array of hmcslock_level_spec_t objects.num_levels: number of the levels in hierarchy, same as the length oflevel_specs.cores_per_node: number of cores per leaf lock node.core_id: core id of the calling thread.
Returns: hmcslock_t* address of hmcslock_t object, which the calling thread should use to acquire the lock.
Returns: NULL when HMCS_MAX_THREADS is too small.
Function hmcslock_init
static void hmcslock_init(hmcslock_t *locks, vsize_t locks_len, hmcslock_level_spec_t *level_specs, vsize_t num_levels)
Initializes the locks array.
Parameters:
locks: array of hmcslock_t objects.locks_len:locksarray length.level_specs: array of hmcslock_level_spec_t objects.num_levels: number of the levels in heirarchy, same as the length oflevel_specs.
Note: e.g., say you have three levels, the machine, two NUMA nodes, and two caches per NUMA. Then
locks_len = 1(machine) + 2(NUMA) + 4 (2*2 caches) = 7.num_levels = 3(including the machine level) define levels_spec as follows:
level_specs[num_levels] = {
{1, LEVEL_THRESHOLD}, // 1 machine
{2, LEVEL_THRESHOLD}, // 2 NUMAs per machine
{2, LEVEL_THRESHOLD}, // 2 caches per NUMA
}
Function hmcslock_acquire
static void hmcslock_acquire(hmcslock_t *lock, hmcs_node_t *qnode, vsize_t num_levels)
Acquires the given lock.
Parameters:
lock: address of hmcslock_t object.qnode: address of hmcs_node_t object.num_levels: number of levels including machine level.
Note:
lockshould be whathmcslock_which_lockreturned.
Function hmcslock_release
static void hmcslock_release(hmcslock_t *lock, hmcs_node_t *qnode, vsize_t num_levels)
Releases the given lock.
Parameters:
lock: address of hmcslock_t object.qnode: address of hmcs_node_t object.num_levels: number of levels including machine level.
Note:
lockshould be whathmcslock_which_lockreturned.