forked from NUDT-compiler/nudt-compiler-cpp
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
198 lines
6.8 KiB
198 lines
6.8 KiB
// SysY 运行库实现:
|
|
// - 按实验/评测规范提供 I/O 等函数实现
|
|
// - 与编译器生成的目标代码链接,支撑运行时行为
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <pthread.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
int getint() { int v; scanf("%d", &v); return v; }
|
|
int getch() { return getchar(); }
|
|
void putint(int v) { printf("%d", v); }
|
|
void putch(int c) { putchar(c); }
|
|
float getfloat() { float v; scanf("%f", &v); return v; }
|
|
void putfloat(float v) { printf("%a", v); }
|
|
|
|
int getarray(int* a) {
|
|
int n; scanf("%d", &n);
|
|
for (int i = 0; i < n; i++) scanf("%d", &a[i]);
|
|
return n;
|
|
}
|
|
int getfarray(float* a) {
|
|
int n; scanf("%d", &n);
|
|
for (int i = 0; i < n; i++) scanf("%f", &a[i]);
|
|
return n;
|
|
}
|
|
void putarray(int n, int* a) {
|
|
printf("%d:", n);
|
|
for (int i = 0; i < n; i++) printf(" %d", a[i]);
|
|
printf("\n");
|
|
}
|
|
void putfarray(int n, float* a) {
|
|
printf("%d:", n);
|
|
for (int i = 0; i < n; i++) printf(" %a", a[i]);
|
|
printf("\n");
|
|
}
|
|
|
|
static struct timespec _t0;
|
|
void starttime(int l) { (void)l; clock_gettime(CLOCK_MONOTONIC, &_t0); }
|
|
void stoptime(int l) {
|
|
struct timespec t1; clock_gettime(CLOCK_MONOTONIC, &t1);
|
|
fprintf(stderr, "Timer@%d: %ldms\n", l,
|
|
(t1.tv_sec-_t0.tv_sec)*1000+(t1.tv_nsec-_t0.tv_nsec)/1000000);
|
|
}
|
|
|
|
void __fill_i32(int* base, int count, int value) {
|
|
if (!base || count <= 0) return;
|
|
if (value == 0 || value == -1) {
|
|
memset(base, value & 0xff, (size_t)count * sizeof(int));
|
|
return;
|
|
}
|
|
for (int i = 0; i < count; ++i) {
|
|
base[i] = value;
|
|
}
|
|
}
|
|
|
|
typedef struct {
|
|
int* base;
|
|
int start_offset;
|
|
int start_row;
|
|
int end_row;
|
|
int stride;
|
|
int count;
|
|
int value;
|
|
} fill_rows_task_t;
|
|
|
|
static void* __fill_rows_worker(void* opaque) {
|
|
fill_rows_task_t* task = (fill_rows_task_t*)opaque;
|
|
for (int row = task->start_row; row < task->end_row; ++row) {
|
|
int* row_ptr = task->base + task->start_offset + row * task->stride;
|
|
__fill_i32(row_ptr, task->count, task->value);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void __fill_rows_i32(int* base, int start_offset, int rows, int stride, int count,
|
|
int value) {
|
|
if (!base || rows <= 0 || stride <= 0 || count <= 0) return;
|
|
if (rows < 32) {
|
|
fill_rows_task_t task = {base, start_offset, 0, rows, stride, count, value};
|
|
__fill_rows_worker(&task);
|
|
return;
|
|
}
|
|
|
|
pthread_t tids[3];
|
|
fill_rows_task_t tasks[4];
|
|
for (int tid = 0; tid < 4; ++tid) {
|
|
int begin = tid * rows / 4;
|
|
int end = (tid + 1) * rows / 4;
|
|
tasks[tid].base = base;
|
|
tasks[tid].start_offset = start_offset;
|
|
tasks[tid].start_row = begin;
|
|
tasks[tid].end_row = end;
|
|
tasks[tid].stride = stride;
|
|
tasks[tid].count = count;
|
|
tasks[tid].value = value;
|
|
}
|
|
for (int tid = 1; tid < 4; ++tid) {
|
|
pthread_create(&tids[tid - 1], NULL, __fill_rows_worker, &tasks[tid]);
|
|
}
|
|
__fill_rows_worker(&tasks[0]);
|
|
for (int tid = 0; tid < 3; ++tid) {
|
|
pthread_join(tids[tid], NULL);
|
|
}
|
|
}
|
|
|
|
typedef void (*par_worker_fn_t)(int);
|
|
|
|
typedef struct {
|
|
pthread_mutex_t mutex;
|
|
pthread_cond_t start_cv;
|
|
pthread_cond_t done_cv;
|
|
int generation;
|
|
int remaining;
|
|
int helper_count;
|
|
} par_slot_state_t;
|
|
|
|
typedef struct {
|
|
par_slot_state_t* state;
|
|
par_worker_fn_t worker;
|
|
int tid;
|
|
} par_thread_arg_t;
|
|
|
|
static void* __par_pool_worker(void* opaque) {
|
|
par_thread_arg_t* arg = (par_thread_arg_t*)opaque;
|
|
par_slot_state_t* state = arg->state;
|
|
int seen_generation = 0;
|
|
|
|
pthread_mutex_lock(&state->mutex);
|
|
for (;;) {
|
|
while (state->generation == seen_generation) {
|
|
pthread_cond_wait(&state->start_cv, &state->mutex);
|
|
}
|
|
seen_generation = state->generation;
|
|
pthread_mutex_unlock(&state->mutex);
|
|
|
|
arg->worker(arg->tid);
|
|
|
|
pthread_mutex_lock(&state->mutex);
|
|
if (state->remaining > 0) {
|
|
--state->remaining;
|
|
if (state->remaining == 0) {
|
|
pthread_cond_signal(&state->done_cv);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#define DECL_PAR_SLOT(N) \
|
|
extern void __par_worker##N(int) __attribute__((weak)); \
|
|
static par_slot_state_t __par_state##N = { \
|
|
PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER, \
|
|
PTHREAD_COND_INITIALIZER, 0, 0, 0}; \
|
|
static pthread_once_t __par_once##N = PTHREAD_ONCE_INIT; \
|
|
static par_thread_arg_t __par_args##N[3]; \
|
|
static void __par_init##N(void) { \
|
|
pthread_attr_t attr; \
|
|
pthread_t tid; \
|
|
pthread_attr_init(&attr); \
|
|
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); \
|
|
for (int i = 0; i < 3; ++i) { \
|
|
__par_args##N[i].state = &__par_state##N; \
|
|
__par_args##N[i].worker = __par_worker##N; \
|
|
__par_args##N[i].tid = i + 1; \
|
|
if (pthread_create(&tid, &attr, __par_pool_worker, \
|
|
&__par_args##N[i]) == 0) { \
|
|
++__par_state##N.helper_count; \
|
|
} \
|
|
} \
|
|
pthread_attr_destroy(&attr); \
|
|
} \
|
|
void __par_run##N(void) { \
|
|
if (!__par_worker##N) return; \
|
|
pthread_once(&__par_once##N, __par_init##N); \
|
|
pthread_mutex_lock(&__par_state##N.mutex); \
|
|
__par_state##N.remaining = __par_state##N.helper_count; \
|
|
++__par_state##N.generation; \
|
|
pthread_cond_broadcast(&__par_state##N.start_cv); \
|
|
pthread_mutex_unlock(&__par_state##N.mutex); \
|
|
__par_worker##N(0); \
|
|
pthread_mutex_lock(&__par_state##N.mutex); \
|
|
while (__par_state##N.remaining != 0) { \
|
|
pthread_cond_wait(&__par_state##N.done_cv, &__par_state##N.mutex); \
|
|
} \
|
|
pthread_mutex_unlock(&__par_state##N.mutex); \
|
|
}
|
|
|
|
DECL_PAR_SLOT(0)
|
|
DECL_PAR_SLOT(1)
|
|
DECL_PAR_SLOT(2)
|
|
DECL_PAR_SLOT(3)
|
|
DECL_PAR_SLOT(4)
|
|
DECL_PAR_SLOT(5)
|
|
DECL_PAR_SLOT(6)
|
|
DECL_PAR_SLOT(7)
|