You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nudt-compiler-cpp/sylib/sylib.c

198 lines
6.8 KiB

// SysY 运行库实现:
// - 按实验/评测规范提供 I/O 等函数实现
// - 与编译器生成的目标代码链接,支撑运行时行为
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
#include <stdint.h>
#include <string.h>
int getint() { int v; scanf("%d", &v); return v; }
int getch() { return getchar(); }
void putint(int v) { printf("%d", v); }
void putch(int c) { putchar(c); }
float getfloat() { float v; scanf("%f", &v); return v; }
void putfloat(float v) { printf("%a", v); }
int getarray(int* a) {
int n; scanf("%d", &n);
for (int i = 0; i < n; i++) scanf("%d", &a[i]);
return n;
}
int getfarray(float* a) {
int n; scanf("%d", &n);
for (int i = 0; i < n; i++) scanf("%f", &a[i]);
return n;
}
void putarray(int n, int* a) {
printf("%d:", n);
for (int i = 0; i < n; i++) printf(" %d", a[i]);
printf("\n");
}
void putfarray(int n, float* a) {
printf("%d:", n);
for (int i = 0; i < n; i++) printf(" %a", a[i]);
printf("\n");
}
static struct timespec _t0;
void starttime(int l) { (void)l; clock_gettime(CLOCK_MONOTONIC, &_t0); }
void stoptime(int l) {
struct timespec t1; clock_gettime(CLOCK_MONOTONIC, &t1);
fprintf(stderr, "Timer@%d: %ldms\n", l,
(t1.tv_sec-_t0.tv_sec)*1000+(t1.tv_nsec-_t0.tv_nsec)/1000000);
}
void __fill_i32(int* base, int count, int value) {
if (!base || count <= 0) return;
if (value == 0 || value == -1) {
memset(base, value & 0xff, (size_t)count * sizeof(int));
return;
}
for (int i = 0; i < count; ++i) {
base[i] = value;
}
}
typedef struct {
int* base;
int start_offset;
int start_row;
int end_row;
int stride;
int count;
int value;
} fill_rows_task_t;
static void* __fill_rows_worker(void* opaque) {
fill_rows_task_t* task = (fill_rows_task_t*)opaque;
for (int row = task->start_row; row < task->end_row; ++row) {
int* row_ptr = task->base + task->start_offset + row * task->stride;
__fill_i32(row_ptr, task->count, task->value);
}
return NULL;
}
void __fill_rows_i32(int* base, int start_offset, int rows, int stride, int count,
int value) {
if (!base || rows <= 0 || stride <= 0 || count <= 0) return;
if (rows < 32) {
fill_rows_task_t task = {base, start_offset, 0, rows, stride, count, value};
__fill_rows_worker(&task);
return;
}
pthread_t tids[3];
fill_rows_task_t tasks[4];
for (int tid = 0; tid < 4; ++tid) {
int begin = tid * rows / 4;
int end = (tid + 1) * rows / 4;
tasks[tid].base = base;
tasks[tid].start_offset = start_offset;
tasks[tid].start_row = begin;
tasks[tid].end_row = end;
tasks[tid].stride = stride;
tasks[tid].count = count;
tasks[tid].value = value;
}
for (int tid = 1; tid < 4; ++tid) {
pthread_create(&tids[tid - 1], NULL, __fill_rows_worker, &tasks[tid]);
}
__fill_rows_worker(&tasks[0]);
for (int tid = 0; tid < 3; ++tid) {
pthread_join(tids[tid], NULL);
}
}
typedef void (*par_worker_fn_t)(int);
typedef struct {
pthread_mutex_t mutex;
pthread_cond_t start_cv;
pthread_cond_t done_cv;
int generation;
int remaining;
int helper_count;
} par_slot_state_t;
typedef struct {
par_slot_state_t* state;
par_worker_fn_t worker;
int tid;
} par_thread_arg_t;
static void* __par_pool_worker(void* opaque) {
par_thread_arg_t* arg = (par_thread_arg_t*)opaque;
par_slot_state_t* state = arg->state;
int seen_generation = 0;
pthread_mutex_lock(&state->mutex);
for (;;) {
while (state->generation == seen_generation) {
pthread_cond_wait(&state->start_cv, &state->mutex);
}
seen_generation = state->generation;
pthread_mutex_unlock(&state->mutex);
arg->worker(arg->tid);
pthread_mutex_lock(&state->mutex);
if (state->remaining > 0) {
--state->remaining;
if (state->remaining == 0) {
pthread_cond_signal(&state->done_cv);
}
}
}
}
#define DECL_PAR_SLOT(N) \
extern void __par_worker##N(int) __attribute__((weak)); \
static par_slot_state_t __par_state##N = { \
PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, \
PTHREAD_COND_INITIALIZER, 0, 0, 0}; \
static pthread_once_t __par_once##N = PTHREAD_ONCE_INIT; \
static par_thread_arg_t __par_args##N[3]; \
static void __par_init##N(void) { \
pthread_attr_t attr; \
pthread_t tid; \
pthread_attr_init(&attr); \
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); \
for (int i = 0; i < 3; ++i) { \
__par_args##N[i].state = &__par_state##N; \
__par_args##N[i].worker = __par_worker##N; \
__par_args##N[i].tid = i + 1; \
if (pthread_create(&tid, &attr, __par_pool_worker, \
&__par_args##N[i]) == 0) { \
++__par_state##N.helper_count; \
} \
} \
pthread_attr_destroy(&attr); \
} \
void __par_run##N(void) { \
if (!__par_worker##N) return; \
pthread_once(&__par_once##N, __par_init##N); \
pthread_mutex_lock(&__par_state##N.mutex); \
__par_state##N.remaining = __par_state##N.helper_count; \
++__par_state##N.generation; \
pthread_cond_broadcast(&__par_state##N.start_cv); \
pthread_mutex_unlock(&__par_state##N.mutex); \
__par_worker##N(0); \
pthread_mutex_lock(&__par_state##N.mutex); \
while (__par_state##N.remaining != 0) { \
pthread_cond_wait(&__par_state##N.done_cv, &__par_state##N.mutex); \
} \
pthread_mutex_unlock(&__par_state##N.mutex); \
}
DECL_PAR_SLOT(0)
DECL_PAR_SLOT(1)
DECL_PAR_SLOT(2)
DECL_PAR_SLOT(3)
DECL_PAR_SLOT(4)
DECL_PAR_SLOT(5)
DECL_PAR_SLOT(6)
DECL_PAR_SLOT(7)