You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

648 lines
11 KiB

.data
.globl mod
.p2align 2
mod:
.word 998244353
.globl d
.p2align 2
d:
.word 0
.globl maxlen
.p2align 2
maxlen:
.word 2097152
.globl temp
.p2align 2
temp:
.zero 8388608
.globl a
.p2align 2
a:
.zero 8388608
.globl b
.p2align 2
b:
.zero 8388608
.globl c
.p2align 2
c:
.zero 8388608
.text
.globl multiply
.p2align 2
multiply:
.L.multiply.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32
str x20, [sp, #0]
str x19, [sp, #8]
mov w19, w0
mov w20, w1
mov w8, #0
cmp w20, w8
b.eq .L.multiply.1
b .L.multiply.2
.L.multiply.1:
mov w8, #0
mov w0, w8
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.L.multiply.2:
mov w8, #1
cmp w20, w8
b.eq .L.multiply.3
b .L.multiply.4
.L.multiply.3:
movz w8, #1
movk w8, #15232, lsl #16
sdiv w14, w19, w8
msub w8, w14, w8, w19
mov w0, w8
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.L.multiply.4:
mov w8, #2
add w8, w20, #1
cmp w20, #0
csel w8, w8, w20, lt
asr w8, w8, #1
mov w0, w19
mov w1, w8
bl multiply
mov w8, w0
add w9, w8, w8
movz w8, #1
movk w8, #15232, lsl #16
sdiv w14, w9, w8
msub w10, w14, w8, w9
add w8, w20, #1
cmp w20, #0
csel w8, w8, w20, lt
asr w9, w8, #1
mov w8, #2
msub w9, w9, w8, w20
mov w8, #1
cmp w9, w8
b.eq .L.multiply.5
b .L.multiply.6
.L.multiply.5:
add w9, w10, w19
movz w8, #1
movk w8, #15232, lsl #16
sdiv w14, w9, w8
msub w8, w14, w8, w9
mov w0, w8
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.L.multiply.6:
mov w0, w10
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.text
.globl power
.p2align 2
power:
.L.power.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32
str x20, [sp, #0]
str x19, [sp, #8]
mov w19, w0
mov w20, w1
mov w8, #0
cmp w20, w8
b.eq .L.power.1
b .L.power.2
.L.power.1:
mov w8, #1
mov w0, w8
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.L.power.2:
mov w8, #2
add w8, w20, #1
cmp w20, #0
csel w8, w8, w20, lt
asr w8, w8, #1
mov w0, w19
mov w1, w8
bl power
mov w8, w0
mov w0, w8
mov w1, w8
bl multiply
mov w9, w0
add w8, w20, #1
cmp w20, #0
csel w8, w8, w20, lt
asr w10, w8, #1
mov w8, #2
msub w8, w10, w8, w20
mov w10, #1
cmp w8, w10
b.eq .L.power.3
b .L.power.4
.L.power.3:
mov w0, w9
mov w1, w19
bl multiply
mov w8, w0
mov w0, w8
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.L.power.4:
mov w0, w9
ldr x20, [sp, #0]
ldr x19, [sp, #8]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.text
.globl memmove
.p2align 2
memmove:
.L.memmove.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #32
str x19, [sp, #0]
mov x12, x0
mov w11, w1
mov x10, x2
mov w15, w3
mov w8, #0
mov w19, w8
b .L.memmove.1
.L.memmove.1:
cmp w19, w15
b.lt .L.memmove.2
b .L.memmove.3
.L.memmove.2:
add w8, w11, w19
sxtw x9, w19
lsl x9, x9, #2
add x9, x10, x9
ldr w9, [x9]
sxtw x8, w8
lsl x8, x8, #2
add x8, x12, x8
str w9, [x8]
mov w8, #1
add w8, w19, w8
mov w19, w8
b .L.memmove.1
.L.memmove.3:
mov w0, w19
ldr x19, [sp, #0]
add sp, sp, #32
ldp x29, x30, [sp], #16
ret
.text
.globl fft
.p2align 2
fft:
.L.fft.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #112
str x23, [sp, #0]
str x19, [sp, #8]
str x22, [sp, #16]
str x20, [sp, #24]
str x21, [sp, #32]
str x26, [sp, #40]
str x27, [sp, #48]
str x24, [sp, #56]
str x25, [sp, #64]
str x28, [sp, #72]
mov x21, x0
mov w20, w1
mov w22, w2
mov w19, w3
mov w8, #1
cmp w22, w8
b.eq .L.fft.1
b .L.fft.2
.L.fft.1:
mov w8, #1
mov w0, w8
ldr x23, [sp, #0]
ldr x19, [sp, #8]
ldr x22, [sp, #16]
ldr x20, [sp, #24]
ldr x21, [sp, #32]
ldr x26, [sp, #40]
ldr x27, [sp, #48]
ldr x24, [sp, #56]
ldr x25, [sp, #64]
ldr x28, [sp, #72]
add sp, sp, #112
ldp x29, x30, [sp], #16
ret
.L.fft.2:
mov w8, #0
mov w12, w8
b .L.fft.3
.L.fft.3:
cmp w12, w22
b.lt .L.fft.4
b .L.fft.5
.L.fft.4:
mov w8, #2
add w8, w12, #1
cmp w12, #0
csel w8, w8, w12, lt
asr w8, w8, #1
mov w9, #2
msub w8, w8, w9, w12
mov w9, #0
cmp w8, w9
b.eq .L.fft.6
b .L.fft.7
.L.fft.5:
mov x0, x21
mov w1, w20
adrp x8, temp
add x8, x8, :lo12:temp
mov x2, x8
mov w3, w22
bl memmove
mov w8, w0
mov w8, #2
add w8, w22, #1
cmp w22, #0
csel w8, w8, w22, lt
asr w25, w8, #1
mov w0, w19
mov w1, w19
bl multiply
mov w8, w0
mov x0, x21
mov w1, w20
mov w2, w25
mov w3, w8
bl fft
mov w8, w0
add w23, w20, w25
mov w0, w19
mov w1, w19
bl multiply
mov w8, w0
mov x0, x21
mov w1, w23
mov w2, w25
mov w3, w8
bl fft
mov w8, w0
mov w8, #1
mov w24, w8
mov w8, #0
stur w8, [x29, #-4]
b .L.fft.9
.L.fft.6:
mov w8, #2
add w8, w12, #1
cmp w12, #0
csel w8, w8, w12, lt
asr w11, w8, #1
add w8, w12, w20
sxtw x8, w8
lsl x8, x8, #2
add x8, x21, x8
ldr w10, [x8]
adrp x9, temp
add x9, x9, :lo12:temp
sxtw x8, w11
lsl x8, x8, #2
add x8, x9, x8
str w10, [x8]
b .L.fft.8
.L.fft.7:
mov w8, #2
add w8, w22, #1
cmp w22, #0
csel w8, w8, w22, lt
asr w9, w8, #1
add w8, w12, #1
cmp w12, #0
csel w8, w8, w12, lt
asr w8, w8, #1
add w11, w9, w8
add w8, w12, w20
sxtw x8, w8
lsl x8, x8, #2
add x8, x21, x8
ldr w10, [x8]
adrp x9, temp
add x9, x9, :lo12:temp
sxtw x8, w11
lsl x8, x8, #2
add x8, x9, x8
str w10, [x8]
b .L.fft.8
.L.fft.8:
mov w8, #1
add w8, w12, w8
mov w12, w8
b .L.fft.3
.L.fft.9:
ldur w8, [x29, #-4]
cmp w8, w25
b.lt .L.fft.10
b .L.fft.11
.L.fft.10:
ldur w8, [x29, #-4]
add w23, w20, w8
sxtw x8, w23
lsl x8, x8, #2
add x8, x21, x8
ldr w8, [x8]
stur w8, [x29, #-8]
mov w8, #2
add w8, w22, #1
cmp w22, #0
csel w8, w8, w22, lt
asr w8, w8, #1
add w26, w23, w8
sxtw x8, w26
lsl x8, x8, #2
add x8, x21, x8
ldr w27, [x8]
mov w0, w24
mov w1, w27
bl multiply
mov w8, w0
ldur w9, [x29, #-8]
add w8, w9, w8
movz w28, #1
movk w28, #15232, lsl #16
sdiv w14, w8, w28
msub w8, w14, w28, w8
sxtw x9, w23
lsl x9, x9, #2
add x9, x21, x9
str w8, [x9]
mov w0, w24
mov w1, w27
bl multiply
mov w8, w0
ldur w9, [x29, #-8]
sub w8, w9, w8
add w8, w8, w28
sdiv w14, w8, w28
msub w8, w14, w28, w8
sxtw x9, w26
lsl x9, x9, #2
add x9, x21, x9
str w8, [x9]
mov w0, w24
mov w1, w19
bl multiply
mov w10, w0
mov w9, #1
ldur w8, [x29, #-4]
add w8, w8, w9
mov w24, w10
stur w8, [x29, #-4]
b .L.fft.9
.L.fft.11:
mov w8, #0
mov w0, w8
ldr x23, [sp, #0]
ldr x19, [sp, #8]
ldr x22, [sp, #16]
ldr x20, [sp, #24]
ldr x21, [sp, #32]
ldr x26, [sp, #40]
ldr x27, [sp, #48]
ldr x24, [sp, #56]
ldr x25, [sp, #64]
ldr x28, [sp, #72]
add sp, sp, #112
ldp x29, x30, [sp], #16
ret
.text
.globl main
.p2align 2
main:
.L.main.0:
stp x29, x30, [sp, #-16]!
mov x29, sp
sub sp, sp, #64
str x23, [sp, #0]
str x22, [sp, #8]
str x19, [sp, #16]
str x21, [sp, #24]
str x20, [sp, #32]
str x24, [sp, #40]
adrp x8, a
add x8, x8, :lo12:a
mov x0, x8
bl getarray
mov w21, w0
adrp x8, b
add x8, x8, :lo12:b
mov x0, x8
bl getarray
mov w20, w0
mov w8, #60
mov w0, w8
bl _sysy_starttime
mov w9, #1
adrp x13, d
str w9, [x13, #:lo12:d]
add w8, w21, w20
sub w10, w8, w9
b .L.main.1
.L.main.1:
adrp x13, d
ldr w8, [x13, #:lo12:d]
cmp w8, w10
b.lt .L.main.2
b .L.main.3
.L.main.2:
adrp x13, d
ldr w9, [x13, #:lo12:d]
mov w8, #2
lsl w8, w9, #1
adrp x13, d
str w8, [x13, #:lo12:d]
b .L.main.1
.L.main.3:
adrp x13, d
ldr w19, [x13, #:lo12:d]
adrp x13, d
ldr w8, [x13, #:lo12:d]
movz w24, #0
movk w24, #15232, lsl #16
sdiv w8, w24, w8
mov w23, #5
mov w0, w23
mov w1, w8
bl power
mov w8, w0
adrp x9, a
add x9, x9, :lo12:a
mov x0, x9
mov w22, #0
mov w1, w22
mov w2, w19
mov w3, w8
bl fft
mov w8, w0
adrp x13, d
ldr w19, [x13, #:lo12:d]
adrp x13, d
ldr w8, [x13, #:lo12:d]
sdiv w8, w24, w8
mov w0, w23
mov w1, w8
bl power
mov w8, w0
adrp x9, b
add x9, x9, :lo12:b
mov x0, x9
mov w1, w22
mov w2, w19
mov w3, w8
bl fft
mov w8, w0
adrp x13, d
ldr w19, [x13, #:lo12:d]
b .L.main.4
.L.main.4:
cmp w22, w19
b.lt .L.main.5
b .L.main.6
.L.main.5:
adrp x23, a
add x23, x23, :lo12:a
sxtw x8, w22
lsl x8, x8, #2
add x8, x23, x8
ldr w8, [x8]
adrp x9, b
add x9, x9, :lo12:b
sxtw x10, w22
lsl x10, x10, #2
add x9, x9, x10
ldr w9, [x9]
mov w0, w8
mov w1, w9
bl multiply
mov w9, w0
sxtw x8, w22
lsl x8, x8, #2
add x8, x23, x8
str w9, [x8]
mov w8, #1
add w8, w22, w8
mov w22, w8
b .L.main.4
.L.main.6:
adrp x13, d
ldr w22, [x13, #:lo12:d]
adrp x13, d
ldr w8, [x13, #:lo12:d]
movz w9, #0
movk w9, #15232, lsl #16
sdiv w8, w9, w8
sub w9, w9, w8
mov w8, #5
mov w0, w8
mov w1, w9
bl power
mov w9, w0
adrp x8, a
add x8, x8, :lo12:a
mov x0, x8
mov w19, #0
mov w1, w19
mov w2, w22
mov w3, w9
bl fft
mov w8, w0
adrp x13, d
ldr w23, [x13, #:lo12:d]
mov w22, w19
b .L.main.7
.L.main.7:
cmp w22, w23
b.lt .L.main.8
b .L.main.9
.L.main.8:
adrp x24, a
add x24, x24, :lo12:a
sxtw x8, w22
lsl x8, x8, #2
add x8, x24, x8
ldr w19, [x8]
adrp x13, d
ldr w8, [x13, #:lo12:d]
mov w0, w8
movz w8, #65535
movk w8, #15231, lsl #16
mov w1, w8
bl power
mov w8, w0
mov w0, w19
mov w1, w8
bl multiply
mov w9, w0
sxtw x8, w22
lsl x8, x8, #2
add x8, x24, x8
str w9, [x8]
mov w8, #1
add w8, w22, w8
mov w22, w8
b .L.main.7
.L.main.9:
mov w8, #79
mov w0, w8
bl _sysy_stoptime
add w9, w21, w20
mov w8, #1
sub w8, w9, w8
mov w0, w8
adrp x8, a
add x8, x8, :lo12:a
mov x1, x8
bl putarray
mov w8, #0
mov w0, w8
ldr x23, [sp, #0]
ldr x22, [sp, #8]
ldr x19, [sp, #16]
ldr x21, [sp, #24]
ldr x20, [sp, #32]
ldr x24, [sp, #40]
add sp, sp, #64
ldp x29, x30, [sp], #16
ret