forked from NUDT-compiler/nudt-compiler-cpp
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
648 lines
11 KiB
648 lines
11 KiB
.data
|
|
.globl mod
|
|
.p2align 2
|
|
mod:
|
|
.word 998244353
|
|
.globl d
|
|
.p2align 2
|
|
d:
|
|
.word 0
|
|
.globl maxlen
|
|
.p2align 2
|
|
maxlen:
|
|
.word 2097152
|
|
.globl temp
|
|
.p2align 2
|
|
temp:
|
|
.zero 8388608
|
|
.globl a
|
|
.p2align 2
|
|
a:
|
|
.zero 8388608
|
|
.globl b
|
|
.p2align 2
|
|
b:
|
|
.zero 8388608
|
|
.globl c
|
|
.p2align 2
|
|
c:
|
|
.zero 8388608
|
|
|
|
.text
|
|
.globl multiply
|
|
.p2align 2
|
|
multiply:
|
|
.L.multiply.0:
|
|
stp x29, x30, [sp, #-16]!
|
|
mov x29, sp
|
|
sub sp, sp, #32
|
|
str x20, [sp, #0]
|
|
str x19, [sp, #8]
|
|
mov w19, w0
|
|
mov w20, w1
|
|
mov w8, #0
|
|
cmp w20, w8
|
|
b.eq .L.multiply.1
|
|
b .L.multiply.2
|
|
.L.multiply.1:
|
|
mov w8, #0
|
|
mov w0, w8
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.multiply.2:
|
|
mov w8, #1
|
|
cmp w20, w8
|
|
b.eq .L.multiply.3
|
|
b .L.multiply.4
|
|
.L.multiply.3:
|
|
movz w8, #1
|
|
movk w8, #15232, lsl #16
|
|
sdiv w14, w19, w8
|
|
msub w8, w14, w8, w19
|
|
mov w0, w8
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.multiply.4:
|
|
mov w8, #2
|
|
add w8, w20, #1
|
|
cmp w20, #0
|
|
csel w8, w8, w20, lt
|
|
asr w8, w8, #1
|
|
mov w0, w19
|
|
mov w1, w8
|
|
bl multiply
|
|
mov w8, w0
|
|
add w9, w8, w8
|
|
movz w8, #1
|
|
movk w8, #15232, lsl #16
|
|
sdiv w14, w9, w8
|
|
msub w10, w14, w8, w9
|
|
add w8, w20, #1
|
|
cmp w20, #0
|
|
csel w8, w8, w20, lt
|
|
asr w9, w8, #1
|
|
mov w8, #2
|
|
msub w9, w9, w8, w20
|
|
mov w8, #1
|
|
cmp w9, w8
|
|
b.eq .L.multiply.5
|
|
b .L.multiply.6
|
|
.L.multiply.5:
|
|
add w9, w10, w19
|
|
movz w8, #1
|
|
movk w8, #15232, lsl #16
|
|
sdiv w14, w9, w8
|
|
msub w8, w14, w8, w9
|
|
mov w0, w8
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.multiply.6:
|
|
mov w0, w10
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
|
|
.text
|
|
.globl power
|
|
.p2align 2
|
|
power:
|
|
.L.power.0:
|
|
stp x29, x30, [sp, #-16]!
|
|
mov x29, sp
|
|
sub sp, sp, #32
|
|
str x20, [sp, #0]
|
|
str x19, [sp, #8]
|
|
mov w19, w0
|
|
mov w20, w1
|
|
mov w8, #0
|
|
cmp w20, w8
|
|
b.eq .L.power.1
|
|
b .L.power.2
|
|
.L.power.1:
|
|
mov w8, #1
|
|
mov w0, w8
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.power.2:
|
|
mov w8, #2
|
|
add w8, w20, #1
|
|
cmp w20, #0
|
|
csel w8, w8, w20, lt
|
|
asr w8, w8, #1
|
|
mov w0, w19
|
|
mov w1, w8
|
|
bl power
|
|
mov w8, w0
|
|
mov w0, w8
|
|
mov w1, w8
|
|
bl multiply
|
|
mov w9, w0
|
|
add w8, w20, #1
|
|
cmp w20, #0
|
|
csel w8, w8, w20, lt
|
|
asr w10, w8, #1
|
|
mov w8, #2
|
|
msub w8, w10, w8, w20
|
|
mov w10, #1
|
|
cmp w8, w10
|
|
b.eq .L.power.3
|
|
b .L.power.4
|
|
.L.power.3:
|
|
mov w0, w9
|
|
mov w1, w19
|
|
bl multiply
|
|
mov w8, w0
|
|
mov w0, w8
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.power.4:
|
|
mov w0, w9
|
|
ldr x20, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
|
|
.text
|
|
.globl memmove
|
|
.p2align 2
|
|
memmove:
|
|
.L.memmove.0:
|
|
stp x29, x30, [sp, #-16]!
|
|
mov x29, sp
|
|
sub sp, sp, #32
|
|
str x19, [sp, #0]
|
|
mov x12, x0
|
|
mov w11, w1
|
|
mov x10, x2
|
|
mov w15, w3
|
|
mov w8, #0
|
|
mov w19, w8
|
|
b .L.memmove.1
|
|
.L.memmove.1:
|
|
cmp w19, w15
|
|
b.lt .L.memmove.2
|
|
b .L.memmove.3
|
|
.L.memmove.2:
|
|
add w8, w11, w19
|
|
sxtw x9, w19
|
|
lsl x9, x9, #2
|
|
add x9, x10, x9
|
|
ldr w9, [x9]
|
|
sxtw x8, w8
|
|
lsl x8, x8, #2
|
|
add x8, x12, x8
|
|
str w9, [x8]
|
|
mov w8, #1
|
|
add w8, w19, w8
|
|
mov w19, w8
|
|
b .L.memmove.1
|
|
.L.memmove.3:
|
|
mov w0, w19
|
|
ldr x19, [sp, #0]
|
|
add sp, sp, #32
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
|
|
.text
|
|
.globl fft
|
|
.p2align 2
|
|
fft:
|
|
.L.fft.0:
|
|
stp x29, x30, [sp, #-16]!
|
|
mov x29, sp
|
|
sub sp, sp, #112
|
|
str x23, [sp, #0]
|
|
str x19, [sp, #8]
|
|
str x22, [sp, #16]
|
|
str x20, [sp, #24]
|
|
str x21, [sp, #32]
|
|
str x26, [sp, #40]
|
|
str x27, [sp, #48]
|
|
str x24, [sp, #56]
|
|
str x25, [sp, #64]
|
|
str x28, [sp, #72]
|
|
mov x21, x0
|
|
mov w20, w1
|
|
mov w22, w2
|
|
mov w19, w3
|
|
mov w8, #1
|
|
cmp w22, w8
|
|
b.eq .L.fft.1
|
|
b .L.fft.2
|
|
.L.fft.1:
|
|
mov w8, #1
|
|
mov w0, w8
|
|
ldr x23, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
ldr x22, [sp, #16]
|
|
ldr x20, [sp, #24]
|
|
ldr x21, [sp, #32]
|
|
ldr x26, [sp, #40]
|
|
ldr x27, [sp, #48]
|
|
ldr x24, [sp, #56]
|
|
ldr x25, [sp, #64]
|
|
ldr x28, [sp, #72]
|
|
add sp, sp, #112
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
.L.fft.2:
|
|
mov w8, #0
|
|
mov w12, w8
|
|
b .L.fft.3
|
|
.L.fft.3:
|
|
cmp w12, w22
|
|
b.lt .L.fft.4
|
|
b .L.fft.5
|
|
.L.fft.4:
|
|
mov w8, #2
|
|
add w8, w12, #1
|
|
cmp w12, #0
|
|
csel w8, w8, w12, lt
|
|
asr w8, w8, #1
|
|
mov w9, #2
|
|
msub w8, w8, w9, w12
|
|
mov w9, #0
|
|
cmp w8, w9
|
|
b.eq .L.fft.6
|
|
b .L.fft.7
|
|
.L.fft.5:
|
|
mov x0, x21
|
|
mov w1, w20
|
|
adrp x8, temp
|
|
add x8, x8, :lo12:temp
|
|
mov x2, x8
|
|
mov w3, w22
|
|
bl memmove
|
|
mov w8, w0
|
|
mov w8, #2
|
|
add w8, w22, #1
|
|
cmp w22, #0
|
|
csel w8, w8, w22, lt
|
|
asr w25, w8, #1
|
|
mov w0, w19
|
|
mov w1, w19
|
|
bl multiply
|
|
mov w8, w0
|
|
mov x0, x21
|
|
mov w1, w20
|
|
mov w2, w25
|
|
mov w3, w8
|
|
bl fft
|
|
mov w8, w0
|
|
add w23, w20, w25
|
|
mov w0, w19
|
|
mov w1, w19
|
|
bl multiply
|
|
mov w8, w0
|
|
mov x0, x21
|
|
mov w1, w23
|
|
mov w2, w25
|
|
mov w3, w8
|
|
bl fft
|
|
mov w8, w0
|
|
mov w8, #1
|
|
mov w24, w8
|
|
mov w8, #0
|
|
stur w8, [x29, #-4]
|
|
b .L.fft.9
|
|
.L.fft.6:
|
|
mov w8, #2
|
|
add w8, w12, #1
|
|
cmp w12, #0
|
|
csel w8, w8, w12, lt
|
|
asr w11, w8, #1
|
|
add w8, w12, w20
|
|
sxtw x8, w8
|
|
lsl x8, x8, #2
|
|
add x8, x21, x8
|
|
ldr w10, [x8]
|
|
adrp x9, temp
|
|
add x9, x9, :lo12:temp
|
|
sxtw x8, w11
|
|
lsl x8, x8, #2
|
|
add x8, x9, x8
|
|
str w10, [x8]
|
|
b .L.fft.8
|
|
.L.fft.7:
|
|
mov w8, #2
|
|
add w8, w22, #1
|
|
cmp w22, #0
|
|
csel w8, w8, w22, lt
|
|
asr w9, w8, #1
|
|
add w8, w12, #1
|
|
cmp w12, #0
|
|
csel w8, w8, w12, lt
|
|
asr w8, w8, #1
|
|
add w11, w9, w8
|
|
add w8, w12, w20
|
|
sxtw x8, w8
|
|
lsl x8, x8, #2
|
|
add x8, x21, x8
|
|
ldr w10, [x8]
|
|
adrp x9, temp
|
|
add x9, x9, :lo12:temp
|
|
sxtw x8, w11
|
|
lsl x8, x8, #2
|
|
add x8, x9, x8
|
|
str w10, [x8]
|
|
b .L.fft.8
|
|
.L.fft.8:
|
|
mov w8, #1
|
|
add w8, w12, w8
|
|
mov w12, w8
|
|
b .L.fft.3
|
|
.L.fft.9:
|
|
ldur w8, [x29, #-4]
|
|
cmp w8, w25
|
|
b.lt .L.fft.10
|
|
b .L.fft.11
|
|
.L.fft.10:
|
|
ldur w8, [x29, #-4]
|
|
add w23, w20, w8
|
|
sxtw x8, w23
|
|
lsl x8, x8, #2
|
|
add x8, x21, x8
|
|
ldr w8, [x8]
|
|
stur w8, [x29, #-8]
|
|
mov w8, #2
|
|
add w8, w22, #1
|
|
cmp w22, #0
|
|
csel w8, w8, w22, lt
|
|
asr w8, w8, #1
|
|
add w26, w23, w8
|
|
sxtw x8, w26
|
|
lsl x8, x8, #2
|
|
add x8, x21, x8
|
|
ldr w27, [x8]
|
|
mov w0, w24
|
|
mov w1, w27
|
|
bl multiply
|
|
mov w8, w0
|
|
ldur w9, [x29, #-8]
|
|
add w8, w9, w8
|
|
movz w28, #1
|
|
movk w28, #15232, lsl #16
|
|
sdiv w14, w8, w28
|
|
msub w8, w14, w28, w8
|
|
sxtw x9, w23
|
|
lsl x9, x9, #2
|
|
add x9, x21, x9
|
|
str w8, [x9]
|
|
mov w0, w24
|
|
mov w1, w27
|
|
bl multiply
|
|
mov w8, w0
|
|
ldur w9, [x29, #-8]
|
|
sub w8, w9, w8
|
|
add w8, w8, w28
|
|
sdiv w14, w8, w28
|
|
msub w8, w14, w28, w8
|
|
sxtw x9, w26
|
|
lsl x9, x9, #2
|
|
add x9, x21, x9
|
|
str w8, [x9]
|
|
mov w0, w24
|
|
mov w1, w19
|
|
bl multiply
|
|
mov w10, w0
|
|
mov w9, #1
|
|
ldur w8, [x29, #-4]
|
|
add w8, w8, w9
|
|
mov w24, w10
|
|
stur w8, [x29, #-4]
|
|
b .L.fft.9
|
|
.L.fft.11:
|
|
mov w8, #0
|
|
mov w0, w8
|
|
ldr x23, [sp, #0]
|
|
ldr x19, [sp, #8]
|
|
ldr x22, [sp, #16]
|
|
ldr x20, [sp, #24]
|
|
ldr x21, [sp, #32]
|
|
ldr x26, [sp, #40]
|
|
ldr x27, [sp, #48]
|
|
ldr x24, [sp, #56]
|
|
ldr x25, [sp, #64]
|
|
ldr x28, [sp, #72]
|
|
add sp, sp, #112
|
|
ldp x29, x30, [sp], #16
|
|
ret
|
|
|
|
.text
|
|
.globl main
|
|
.p2align 2
|
|
main:
|
|
.L.main.0:
|
|
stp x29, x30, [sp, #-16]!
|
|
mov x29, sp
|
|
sub sp, sp, #64
|
|
str x23, [sp, #0]
|
|
str x22, [sp, #8]
|
|
str x19, [sp, #16]
|
|
str x21, [sp, #24]
|
|
str x20, [sp, #32]
|
|
str x24, [sp, #40]
|
|
adrp x8, a
|
|
add x8, x8, :lo12:a
|
|
mov x0, x8
|
|
bl getarray
|
|
mov w21, w0
|
|
adrp x8, b
|
|
add x8, x8, :lo12:b
|
|
mov x0, x8
|
|
bl getarray
|
|
mov w20, w0
|
|
mov w8, #60
|
|
mov w0, w8
|
|
bl _sysy_starttime
|
|
mov w9, #1
|
|
adrp x13, d
|
|
str w9, [x13, #:lo12:d]
|
|
add w8, w21, w20
|
|
sub w10, w8, w9
|
|
b .L.main.1
|
|
.L.main.1:
|
|
adrp x13, d
|
|
ldr w8, [x13, #:lo12:d]
|
|
cmp w8, w10
|
|
b.lt .L.main.2
|
|
b .L.main.3
|
|
.L.main.2:
|
|
adrp x13, d
|
|
ldr w9, [x13, #:lo12:d]
|
|
mov w8, #2
|
|
lsl w8, w9, #1
|
|
adrp x13, d
|
|
str w8, [x13, #:lo12:d]
|
|
b .L.main.1
|
|
.L.main.3:
|
|
adrp x13, d
|
|
ldr w19, [x13, #:lo12:d]
|
|
adrp x13, d
|
|
ldr w8, [x13, #:lo12:d]
|
|
movz w24, #0
|
|
movk w24, #15232, lsl #16
|
|
sdiv w8, w24, w8
|
|
mov w23, #5
|
|
mov w0, w23
|
|
mov w1, w8
|
|
bl power
|
|
mov w8, w0
|
|
adrp x9, a
|
|
add x9, x9, :lo12:a
|
|
mov x0, x9
|
|
mov w22, #0
|
|
mov w1, w22
|
|
mov w2, w19
|
|
mov w3, w8
|
|
bl fft
|
|
mov w8, w0
|
|
adrp x13, d
|
|
ldr w19, [x13, #:lo12:d]
|
|
adrp x13, d
|
|
ldr w8, [x13, #:lo12:d]
|
|
sdiv w8, w24, w8
|
|
mov w0, w23
|
|
mov w1, w8
|
|
bl power
|
|
mov w8, w0
|
|
adrp x9, b
|
|
add x9, x9, :lo12:b
|
|
mov x0, x9
|
|
mov w1, w22
|
|
mov w2, w19
|
|
mov w3, w8
|
|
bl fft
|
|
mov w8, w0
|
|
adrp x13, d
|
|
ldr w19, [x13, #:lo12:d]
|
|
b .L.main.4
|
|
.L.main.4:
|
|
cmp w22, w19
|
|
b.lt .L.main.5
|
|
b .L.main.6
|
|
.L.main.5:
|
|
adrp x23, a
|
|
add x23, x23, :lo12:a
|
|
sxtw x8, w22
|
|
lsl x8, x8, #2
|
|
add x8, x23, x8
|
|
ldr w8, [x8]
|
|
adrp x9, b
|
|
add x9, x9, :lo12:b
|
|
sxtw x10, w22
|
|
lsl x10, x10, #2
|
|
add x9, x9, x10
|
|
ldr w9, [x9]
|
|
mov w0, w8
|
|
mov w1, w9
|
|
bl multiply
|
|
mov w9, w0
|
|
sxtw x8, w22
|
|
lsl x8, x8, #2
|
|
add x8, x23, x8
|
|
str w9, [x8]
|
|
mov w8, #1
|
|
add w8, w22, w8
|
|
mov w22, w8
|
|
b .L.main.4
|
|
.L.main.6:
|
|
adrp x13, d
|
|
ldr w22, [x13, #:lo12:d]
|
|
adrp x13, d
|
|
ldr w8, [x13, #:lo12:d]
|
|
movz w9, #0
|
|
movk w9, #15232, lsl #16
|
|
sdiv w8, w9, w8
|
|
sub w9, w9, w8
|
|
mov w8, #5
|
|
mov w0, w8
|
|
mov w1, w9
|
|
bl power
|
|
mov w9, w0
|
|
adrp x8, a
|
|
add x8, x8, :lo12:a
|
|
mov x0, x8
|
|
mov w19, #0
|
|
mov w1, w19
|
|
mov w2, w22
|
|
mov w3, w9
|
|
bl fft
|
|
mov w8, w0
|
|
adrp x13, d
|
|
ldr w23, [x13, #:lo12:d]
|
|
mov w22, w19
|
|
b .L.main.7
|
|
.L.main.7:
|
|
cmp w22, w23
|
|
b.lt .L.main.8
|
|
b .L.main.9
|
|
.L.main.8:
|
|
adrp x24, a
|
|
add x24, x24, :lo12:a
|
|
sxtw x8, w22
|
|
lsl x8, x8, #2
|
|
add x8, x24, x8
|
|
ldr w19, [x8]
|
|
adrp x13, d
|
|
ldr w8, [x13, #:lo12:d]
|
|
mov w0, w8
|
|
movz w8, #65535
|
|
movk w8, #15231, lsl #16
|
|
mov w1, w8
|
|
bl power
|
|
mov w8, w0
|
|
mov w0, w19
|
|
mov w1, w8
|
|
bl multiply
|
|
mov w9, w0
|
|
sxtw x8, w22
|
|
lsl x8, x8, #2
|
|
add x8, x24, x8
|
|
str w9, [x8]
|
|
mov w8, #1
|
|
add w8, w22, w8
|
|
mov w22, w8
|
|
b .L.main.7
|
|
.L.main.9:
|
|
mov w8, #79
|
|
mov w0, w8
|
|
bl _sysy_stoptime
|
|
add w9, w21, w20
|
|
mov w8, #1
|
|
sub w8, w9, w8
|
|
mov w0, w8
|
|
adrp x8, a
|
|
add x8, x8, :lo12:a
|
|
mov x1, x8
|
|
bl putarray
|
|
mov w8, #0
|
|
mov w0, w8
|
|
ldr x23, [sp, #0]
|
|
ldr x22, [sp, #8]
|
|
ldr x19, [sp, #16]
|
|
ldr x21, [sp, #24]
|
|
ldr x20, [sp, #32]
|
|
ldr x24, [sp, #40]
|
|
add sp, sp, #64
|
|
ldp x29, x30, [sp], #16
|
|
ret
|