From 87b7ea523ba1e76fda1d3a52ea14984fa6d27dc9 Mon Sep 17 00:00:00 2001
From: WangRunji <wangrunji0408@163.com>
Date: Tue, 10 Jul 2018 01:32:05 +0800
Subject: [PATCH] Experimental patch for core::sync::atomic on RISCV32I

---
 Makefile                      |  4 +++
 docs/RISCV.md                 | 30 ++++++++++++++------
 riscv32-blog_os.json          |  2 +-
 src/arch/riscv32/atomic.patch | 52 +++++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 10 deletions(-)
 create mode 100644 src/arch/riscv32/atomic.patch

diff --git a/Makefile b/Makefile
index 0089434..083d413 100644
--- a/Makefile
+++ b/Makefile
@@ -144,6 +144,10 @@ build/user/%.o: user/%.img
 	@mkdir -p $(shell dirname $@)
 	@$(ld) -r -b binary $< -o $@
 
+# patch Rust core for RISCV32I atomic
+patch-core:
+	@patch -p0 /rust/rust-riscv-rust-1.26.0-1-dev/src/libcore/sync/atomic.rs src/arch/riscv32/atomic.patch
+
 # used by docker_* targets
 docker_image ?= blog_os
 tag ?= 0.1
diff --git a/docs/RISCV.md b/docs/RISCV.md
index 04110db..78570dc 100644
--- a/docs/RISCV.md
+++ b/docs/RISCV.md
@@ -9,27 +9,39 @@
 
 ## Rust-RISCV
 
-### 目标指令集：RISCV32IMA
+### 目标指令集：RISCV32IM
 
-target: riscv32ima_unknown_none
+target: riscv32im_unknown_none
 
 由于工具链二进制版本尚未内置此target，因此需提供配置文件：`riscv32-blog_os.json`。
 
 理想情况下，目标指令集应为RISCV32G，即使用全部扩展。但考虑到要把它跑在我们自己实现的CPU上，指令集应该尽量精简，即最好是RISCV32I。此外：
 
-* 为什么用原子指令扩展？
+* 为什么用乘除指令扩展？
 
-  RustOS依赖的库中，大部分都使用了Rust核心库的原子操作（core::sync::atomic）。
+  Rust核心库中fmt模块会使用乘除运算，若不使用乘除指令，则会依赖LLVM提供的内置函数进行软计算，导致链接错误。这一问题理论上可以通过在xargo中设置依赖compiler-builtin解决。但如此操作后，仍有一个函数`__mulsi3`缺失（32×32）。经查，compiler-builtin中实现了类似的`__muldi3`函数（64×64)，所以理论上可以用它手动实现前者。但如此操作后，还是不对，实验表明`__muldi3`本身也是不正确的。
 
-  如果目标指令集不支持原子操作，会导致无法编译。
+  总之，没有成功配置不使用M扩展的编译环境，不过日后解决这一问题并不困难。
+  
+### 原子操作支持
 
-  然而LLVM后端尚不完全支持原子指令扩展，因此这条路可能走不通，需要魔改Rust标准库。
+配置文件中与原子操作相关的有两处：
 
-* 为什么用乘除指令扩展？
+* `feature`中`+a`：使用A指令扩展
+* `max-atomic-width`：决定能否使用core中的atomic模块，设为0不可以，设为32可以
 
-  Rust核心库中fmt模块会使用乘除运算，若不使用乘除指令，则会依赖LLVM提供的内置函数进行软计算，导致链接错误。这一问题理论上可以通过在xargo中设置依赖compiler-builtin解决。但如此操作后，仍有一个函数`__mulsi3`缺失（32×32）。经查，compiler-builtin中实现了类似的`__muldi3`函数（64×64)，所以理论上可以用它手动实现前者。但如此操作后，还是不对，实验表明`__muldi3`本身也是不正确的。
+二者是否相关，还不能确定。
 
-  总之，没有成功配置不使用M扩展的编译环境，不过日后解决这一问题并不困难。
+* 一方面，`riscv-rust/rust`官方配置中，二者是相关的。
+* 另一方面，即使不使用A指令扩展，设置`max-atomic-width=32`，也可以编译通过。经检查生成的代码中包含了fence指令。这说明RISCV32I也可以用实现基本同步操作（？）
+
+然而由于LLVM后端对RISCV原子操作支持不完善，无论是否`+a`，当使用Mutex时，它会调用core中的`atomic_compare_exchange`函数，LLVM会发生错误。
+
+鉴于更改上层实现（替换Mutex）工程难度较大，我尝试直接修改core代码，将上述问题函数手动实现。
+
+思路是在关中断环境下，用多条指令完成目标功能。这对于单核环境应该是正确的。
+
+我做了个[补丁](../src/arch/riscv32/atomic.patch)，在进入docker环境后，可运行`make patch-core`应用补丁，确保clear后，再build。
 
 ## BootLoader
 
diff --git a/riscv32-blog_os.json b/riscv32-blog_os.json
index 2f6236e..ab76d65 100644
--- a/riscv32-blog_os.json
+++ b/riscv32-blog_os.json
@@ -7,7 +7,7 @@
   "os": "none",
   "arch": "riscv",
   "cpu": "generic-rv32",
-  "features": "+m,+a",
+  "features": "+m",
   "max-atomic-width": "32",
   "linker": "ld.lld",
   "linker-flavor": "ld",
diff --git a/src/arch/riscv32/atomic.patch b/src/arch/riscv32/atomic.patch
new file mode 100644
index 0000000..4a824b6
--- /dev/null
+++ b/src/arch/riscv32/atomic.patch
@@ -0,0 +1,52 @@
+--- atomic_backup.rs	2018-07-10 00:29:48.000000000 +0800
++++ atomic.rs	2018-07-10 00:49:04.000000000 +0800
+@@ -1618,29 +1618,29 @@
+ }
+ 
+ #[inline]
+-unsafe fn atomic_compare_exchange<T>(dst: *mut T,
++#[cfg(target_arch = "riscv")]
++unsafe fn atomic_compare_exchange<T: PartialEq>(dst: *mut T,
+                                      old: T,
+                                      new: T,
+-                                     success: Ordering,
+-                                     failure: Ordering)
++                                     _success: Ordering,
++                                     _failure: Ordering)
+                                      -> Result<T, T> {
+-    let (val, ok) = match (success, failure) {
+-        (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new),
+-        (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new),
+-        (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new),
+-        (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new),
+-        (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new),
+-        (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new),
+-        (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new),
+-        (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new),
+-        (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new),
+-        (__Nonexhaustive, _) => panic!("invalid memory ordering"),
+-        (_, __Nonexhaustive) => panic!("invalid memory ordering"),
+-        (_, AcqRel) => panic!("there is no such thing as an acquire/release failure ordering"),
+-        (_, Release) => panic!("there is no such thing as a release failure ordering"),
+-        _ => panic!("a failure ordering can't be stronger than a success ordering"),
+-    };
+-    if ok { Ok(val) } else { Err(val) }
++    let sstatus: usize;
++    asm!("csrrs $0, 0x100, x0" : "=r"(sstatus) ::: "volatile");
++    // Disable interrupt: sstatus::clear_sie()
++    asm!("csrrc x0, 0x100, $0" :: "r"(1) :: "volatile");
++
++    let ret = atomic_load(dst, Ordering::Relaxed);
++    if ret == old {
++        atomic_store(dst, new, Ordering::Relaxed);
++    }
++
++    let sie = sstatus & 1 != 0;
++    if sie {
++        // Enable interrupt: sstatus::set_sie()
++        asm!("csrrs x0, 0x100, $0" :: "r"(1) :: "volatile");
++    }
++    Ok(ret)
+ }
+ 
+ #[inline]