You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

708 lines
13 KiB

.data
.globl N
.p2align 2
N:
.word 2048
.globl KSIZE
.p2align 2
KSIZE:
.word 5
.globl state
.p2align 2
state:
.word 0
.globl repeat_factor
.p2align 2
repeat_factor:
.word 0
.globl N_eff
.p2align 2
N_eff:
.word 0
.globl In
.p2align 2
In:
.zero 16777216
.globl Out
.p2align 2
Out:
.zero 16777216
.globl K
.p2align 2
K:
.zero 100
.text
.globl get_random
.p2align 2
get_random:
.L.get_random.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
adrp x13, state
ldr w10, [x13, #:lo12:state]
mov w8, #2048
add w8, w10, #2047
cmp w10, #0
csel w8, w8, w10, lt
asr w9, w8, #11
mov w8, #2048
msub w10, w9, w8, w10
mov w8, #0
mov w11, w8
b .L.get_random.1
.L.get_random.1:
cmp w11, w10
b.lt .L.get_random.2
b .L.get_random.3
.L.get_random.2:
adrp x13, state
ldr w9, [x13, #:lo12:state]
mov w8, #128
add w8, w9, w8
adrp x13, state
str w8, [x13, #:lo12:state]
adrp x13, state
ldr w8, [x13, #:lo12:state]
mov w9, #65535
sdiv w14, w8, w9
msub w8, w14, w9, w8
adrp x13, state
str w8, [x13, #:lo12:state]
mov w8, #1
add w8, w11, w8
mov w11, w8
b .L.get_random.1
.L.get_random.3:
adrp x13, state
ldr w9, [x13, #:lo12:state]
mov w8, #65535
sdiv w14, w9, w8
msub w8, w14, w8, w9
adrp x13, state
str w8, [x13, #:lo12:state]
adrp x13, state
ldr w8, [x13, #:lo12:state]
mov w0, w8
ldp x29, x30, [sp], #16
ret
.text
.globl idx
.p2align 2
idx:
.L.idx.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
mov w8, w0
mov w9, w1
mov w10, w2
mul w8, w8, w10
add w8, w8, w9
mov w0, w8
ldp x29, x30, [sp], #16
ret
.text
.globl init_matrix
.p2align 2
init_matrix:
.L.init_matrix.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #64
str x24, [sp, #0]
str x22, [sp, #8]
str x21, [sp, #16]
str x20, [sp, #24]
str x19, [sp, #32]
str x23, [sp, #40]
mov x19, x0
adrp x13, N_eff
ldr w22, [x13, #:lo12:N_eff]
mov w8, #0
mov w9, w8
b .L.init_matrix.1
.L.init_matrix.1:
cmp w9, w22
b.lt .L.init_matrix.2
b .L.init_matrix.3
.L.init_matrix.2:
adrp x13, N_eff
ldr w10, [x13, #:lo12:N_eff]
mov w8, #2
add w8, w10, #1
cmp w10, #0
csel w8, w8, w10, lt
asr w8, w8, #1
cmp w9, w8
b.lt .L.init_matrix.4
b .L.init_matrix.5
.L.init_matrix.3:
ldr x24, [sp, #0]
ldr x22, [sp, #8]
ldr x21, [sp, #16]
ldr x20, [sp, #24]
ldr x19, [sp, #32]
ldr x23, [sp, #40]
add sp, sp, #64
ldp x29, x30, [sp], #16
ret
.L.init_matrix.4:
adrp x13, N_eff
ldr w23, [x13, #:lo12:N_eff]
mov w8, #0
mov w20, w8
mov w21, w9
b .L.init_matrix.7
.L.init_matrix.5:
adrp x13, N_eff
ldr w20, [x13, #:lo12:N_eff]
mov w8, #0
mov w21, w8
mov w23, w9
b .L.init_matrix.10
.L.init_matrix.6:
mov w9, #1
add w8, w8, w9
mov w9, w8
b .L.init_matrix.1
.L.init_matrix.7:
cmp w20, w23
b.lt .L.init_matrix.8
b .L.init_matrix.9
.L.init_matrix.8:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w21
mov w1, w20
mov w2, w8
bl idx
mov w24, w0
bl get_random
mov w9, w0
mov w8, #65535
sdiv w14, w9, w8
msub w9, w14, w8, w9
sxtw x8, w24
lsl x8, x8, #2
add x8, x19, x8
str w9, [x8]
mov w8, #1
add w8, w20, w8
mov w20, w8
b .L.init_matrix.7
.L.init_matrix.9:
mov w8, w21
b .L.init_matrix.6
.L.init_matrix.10:
cmp w21, w20
b.lt .L.init_matrix.11
b .L.init_matrix.12
.L.init_matrix.11:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w23
mov w1, w21
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
add x8, x19, x8
mov w9, #-1
str w9, [x8]
mov w8, #1
add w8, w21, w8
mov w21, w8
b .L.init_matrix.10
.L.init_matrix.12:
mov w8, w23
b .L.init_matrix.6
.text
.globl init_kernel
.p2align 2
init_kernel:
.L.init_kernel.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
mov x8, x0
mov w9, #0
b .L.init_kernel.1
.L.init_kernel.1:
mov w10, #25
cmp w9, w10
b.lt .L.init_kernel.2
b .L.init_kernel.3
.L.init_kernel.2:
mov w10, #3
sdiv w14, w9, w10
msub w10, w14, w10, w9
mov w11, #1
sub w10, w10, w11
sxtw x12, w9
lsl x12, x12, #2
add x12, x8, x12
str w10, [x12]
add w9, w9, w11
b .L.init_kernel.1
.L.init_kernel.3:
ldp x29, x30, [sp], #16
ret
.text
.globl conv2d
.p2align 2
conv2d:
.L.conv2d.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #112
str x23, [sp, #0]
str x20, [sp, #8]
str x22, [sp, #16]
str x25, [sp, #24]
str x19, [sp, #32]
str x21, [sp, #40]
str x24, [sp, #48]
str x26, [sp, #56]
mov x8, x0
stur x8, [x29, #-8]
mov x21, x1
mov x19, x2
adrp x13, repeat_factor
ldr w8, [x13, #:lo12:repeat_factor]
stur w8, [x29, #-24]
mov w8, #0
b .L.conv2d.1
.L.conv2d.1:
ldur w9, [x29, #-24]
cmp w8, w9
b.lt .L.conv2d.2
b .L.conv2d.3
.L.conv2d.2:
adrp x13, N_eff
ldr w9, [x13, #:lo12:N_eff]
stur w9, [x29, #-28]
mov w9, #0
b .L.conv2d.4
.L.conv2d.3:
ldr x23, [sp, #0]
ldr x20, [sp, #8]
ldr x22, [sp, #16]
ldr x25, [sp, #24]
ldr x19, [sp, #32]
ldr x21, [sp, #40]
ldr x24, [sp, #48]
ldr x26, [sp, #56]
add sp, sp, #112
ldp x29, x30, [sp], #16
ret
.L.conv2d.4:
ldur w10, [x29, #-28]
cmp w9, w10
b.lt .L.conv2d.5
b .L.conv2d.6
.L.conv2d.5:
adrp x13, N_eff
ldr w24, [x13, #:lo12:N_eff]
mov w10, #0
mov w11, w10
mov w12, w8
b .L.conv2d.7
.L.conv2d.6:
mov w9, #1
add w8, w8, w9
b .L.conv2d.1
.L.conv2d.7:
cmp w11, w24
b.lt .L.conv2d.8
b .L.conv2d.9
.L.conv2d.8:
mov w8, #0
mov w10, w8
mov w23, w8
mov w20, w11
mov w22, w9
mov w25, w12
b .L.conv2d.10
.L.conv2d.9:
mov w8, #1
add w8, w9, w8
mov w9, w8
mov w8, w12
b .L.conv2d.4
.L.conv2d.10:
mov w8, #5
cmp w10, w8
b.lt .L.conv2d.11
b .L.conv2d.12
.L.conv2d.11:
add w9, w22, w10
mov w8, #2
sub w8, w9, w8
stur w8, [x29, #-12]
mov w8, #0
stur w8, [x29, #-16]
mov w8, w10
stur w8, [x29, #-20]
b .L.conv2d.13
.L.conv2d.12:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w22
mov w1, w20
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
add x8, x21, x8
str w23, [x8]
mov w8, #1
add w8, w20, w8
mov w11, w8
mov w9, w22
mov w12, w25
b .L.conv2d.7
.L.conv2d.13:
mov w8, #5
ldur w9, [x29, #-16]
cmp w9, w8
b.lt .L.conv2d.14
b .L.conv2d.15
.L.conv2d.14:
ldur w8, [x29, #-16]
add w9, w20, w8
mov w8, #2
sub w10, w9, w8
mov w8, #0
ldur w9, [x29, #-12]
cmp w9, w8
b.ge .L.conv2d.20
mov w11, w23
b .L.conv2d.17
.L.conv2d.15:
mov w8, #1
ldur w9, [x29, #-20]
add w8, w9, w8
mov w10, w8
b .L.conv2d.10
.L.conv2d.16:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
ldur w9, [x29, #-12]
mov w0, w9
mov w1, w10
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
ldur x9, [x29, #-8]
add x8, x9, x8
ldr w26, [x8]
ldur w8, [x29, #-20]
mov w0, w8
ldur w8, [x29, #-16]
mov w1, w8
mov w8, #5
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
add x8, x19, x8
ldr w8, [x8]
mul w8, w26, w8
add w8, w23, w8
mov w11, w8
b .L.conv2d.17
.L.conv2d.17:
mov w8, #1
ldur w9, [x29, #-16]
add w8, w9, w8
stur w8, [x29, #-16]
mov w23, w11
b .L.conv2d.13
.L.conv2d.18:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
cmp w10, w8
b.lt .L.conv2d.16
mov w11, w23
b .L.conv2d.17
.L.conv2d.19:
mov w8, #0
cmp w10, w8
b.ge .L.conv2d.18
mov w11, w23
b .L.conv2d.17
.L.conv2d.20:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
ldur w9, [x29, #-12]
cmp w9, w8
b.lt .L.conv2d.19
mov w11, w23
b .L.conv2d.17
.text
.globl nonlinear
.p2align 2
nonlinear:
.L.nonlinear.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
mov x11, x0
adrp x13, N_eff
ldr w9, [x13, #:lo12:N_eff]
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mul w12, w9, w8
mov w8, #0
mov w15, w8
b .L.nonlinear.1
.L.nonlinear.1:
cmp w15, w12
b.lt .L.nonlinear.2
b .L.nonlinear.3
.L.nonlinear.2:
sxtw x8, w15
lsl x8, x8, #2
add x8, x11, x8
ldr w10, [x8]
mul w9, w10, w10
mov w8, #3
mul w8, w8, w10
add w9, w9, w8
mov w8, #7
sub w9, w9, w8
mov w8, #97
sdiv w14, w9, w8
msub w9, w14, w8, w9
sxtw x8, w15
lsl x8, x8, #2
add x8, x11, x8
str w9, [x8]
mov w8, #1
add w8, w15, w8
mov w15, w8
b .L.nonlinear.1
.L.nonlinear.3:
ldp x29, x30, [sp], #16
ret
.text
.globl row_reduce
.p2align 2
row_reduce:
.L.row_reduce.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #80
str x21, [sp, #0]
str x24, [sp, #8]
str x20, [sp, #16]
str x19, [sp, #24]
str x25, [sp, #32]
str x22, [sp, #40]
str x23, [sp, #48]
mov x23, x0
adrp x13, N_eff
ldr w22, [x13, #:lo12:N_eff]
mov w8, #0
b .L.row_reduce.1
.L.row_reduce.1:
cmp w8, w22
b.lt .L.row_reduce.2
b .L.row_reduce.3
.L.row_reduce.2:
adrp x13, N_eff
ldr w20, [x13, #:lo12:N_eff]
mov w9, #0
mov w21, w9
mov w24, w9
mov w19, w8
b .L.row_reduce.4
.L.row_reduce.3:
ldr x21, [sp, #0]
ldr x24, [sp, #8]
ldr x20, [sp, #16]
ldr x19, [sp, #24]
ldr x25, [sp, #32]
ldr x22, [sp, #40]
ldr x23, [sp, #48]
add sp, sp, #80
ldp x29, x30, [sp], #16
ret
.L.row_reduce.4:
cmp w21, w20
b.lt .L.row_reduce.5
b .L.row_reduce.6
.L.row_reduce.5:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w19
mov w1, w21
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
add x8, x23, x8
ldr w8, [x8]
add w9, w24, w8
mov w8, #1
add w8, w21, w8
mov w21, w8
mov w24, w9
b .L.row_reduce.4
.L.row_reduce.6:
adrp x13, N_eff
ldr w21, [x13, #:lo12:N_eff]
mov w8, #0
mov w25, w8
mov w20, w19
b .L.row_reduce.7
.L.row_reduce.7:
cmp w25, w21
b.lt .L.row_reduce.8
b .L.row_reduce.9
.L.row_reduce.8:
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w20
mov w1, w25
mov w2, w8
bl idx
mov w19, w0
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
mov w0, w20
mov w1, w25
mov w2, w8
bl idx
mov w8, w0
sxtw x8, w8
lsl x8, x8, #2
add x8, x23, x8
ldr w8, [x8]
sub w8, w8, w24
sxtw x9, w19
lsl x9, x9, #2
add x9, x23, x9
str w8, [x9]
mov w8, #1
add w8, w25, w8
mov w25, w8
b .L.row_reduce.7
.L.row_reduce.9:
mov w8, #1
add w8, w20, w8
b .L.row_reduce.1
.text
.globl checksum
.p2align 2
checksum:
.L.checksum.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
mov x9, x0
adrp x13, N_eff
ldr w8, [x13, #:lo12:N_eff]
adrp x13, N_eff
ldr w10, [x13, #:lo12:N_eff]
mul w11, w8, w10
mov w8, #0
mov w10, w8
b .L.checksum.1
.L.checksum.1:
cmp w10, w11
b.lt .L.checksum.2
b .L.checksum.3
.L.checksum.2:
sxtw x12, w10
lsl x12, x12, #2
add x12, x9, x12
ldr w12, [x12]
add w12, w8, w12
mov w8, #1
add w8, w10, w8
mov w10, w8
mov w8, w12
b .L.checksum.1
.L.checksum.3:
mov w0, w8
ldp x29, x30, [sp], #16
ret
.text
.globl main
.p2align 2
main:
.L.main.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32
str x19, [sp, #0]
str x20, [sp, #8]
bl getint
mov w8, w0
adrp x13, state
str w8, [x13, #:lo12:state]
bl getint
mov w8, w0
adrp x13, repeat_factor
str w8, [x13, #:lo12:repeat_factor]
adrp x13, state
ldr w8, [x13, #:lo12:state]
mov w9, #513
sdiv w14, w8, w9
msub w8, w14, w9, w8
mov w9, #64
add w8, w8, w9
adrp x13, N_eff
str w8, [x13, #:lo12:N_eff]
mov w8, #134
mov w0, w8
bl _sysy_starttime
adrp x19, In
add x19, x19, :lo12:In
mov x0, x19
bl init_matrix
adrp x20, K
add x20, x20, :lo12:K
mov x0, x20
bl init_kernel
mov x0, x19
adrp x19, Out
add x19, x19, :lo12:Out
mov x1, x19
mov x2, x20
bl conv2d
mov x0, x19
bl nonlinear
mov x0, x19
bl row_reduce
mov x0, x19
bl checksum
mov w19, w0
mov w8, #145
mov w0, w8
bl _sysy_stoptime
mov w0, w19
bl putint
mov w8, #10
mov w0, w8
bl putch
mov w8, #0
mov w0, w8
ldr x19, [sp, #0]
ldr x20, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret