commit computer-architecture-homework-01

9 months ago · 4dcc3b8893
parent 4051d867a3
commit 4dcc3b8893
5 changed files with 260 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+*/*.pdf
--- a/advanced-computer-architecture/homework/lecture01/lecture01-instruc-sim.py
+++ b/advanced-computer-architecture/homework/lecture01/lecture01-instruc-sim.py
@ -0,0 +1,136 @@
+# Memory
+# 32-bit address
+# 8-bit cell
+# Register File
+# ·32 32-bit registers, with 2 read ports and 1 write portTest bench
+# / Add the number in memory address 0 and 1 to address 3
+# Load r1, #0
+# Load r2,#1
+# Add r3, r1, r2
+# Store r3, #3
+import numpy as np
+import re
+import random
+class register_file:
+    
+    def __init__(self , size) -> None:
+        self.regs = np.array([0] * size , dtype=np.int32)
+        self.size = size
+    
+    def read(self , idx1 : int , idx2 : int) -> tuple:# 2 read port
+            if idx1 < 0 or idx1 >= self.size or idx2 < 0 or idx2 >= self.size:
+                raise IndexError(f"The index is out of range")
+            
+            return (self.regs[idx1] , self.regs[idx2])
+
+    def write(self , idx : int , value : np.int32) -> None:# 1 write port
+        if idx < 0 or idx >= self.size:
+            raise IndexError("The index is out of range")
+        
+        if value < -2**31 or value > 2**31 - 1:
+            raise ValueError("The value is out of range")
+        
+        self.regs[idx] = value
+
+    def info(self) -> None:
+        print(f"register file : {self.regs}")
+
+class memory:
+    def __init__(self , size):
+        self.size = size
+        self.mem = {key : np.int8(0) for key in range(10)}#8-bit cell
+        # use dictionary to simulate 2**32 byte-memory
+
+    def reset(self):
+        self.mem = {key : np.int8(0) for key in self.mem}
+    
+    def info(self) -> None:
+        print(f"memory : {self.mem}")
+    
+    def read(self , idx : int) -> np.int8:
+        if idx < 0 or idx >= self.size:
+            raise IndexError("The index is out of range")
+        
+        if idx not in self.mem:
+            self.mem[idx] = np.int8(0)
+        return self.mem[idx]
+    
+    def write(self , idx , value : np.int8) -> None:
+        if idx < 0 or idx >= self.size:
+            raise IndexError("The index is out of range")
+        
+        if(value < -128 or value > 127):
+            raise ValueError("The value is out of range")
+        
+        self.mem[idx] = value
+
+
+class instruction:
+
+    @staticmethod
+    def Load(ins : list , immediate: bool = False) -> None:
+        regfile.write(ins[1] , mem.read(ins[2]))
+
+    @staticmethod
+    def Loadi(ins : list):#load immediate to register , just for test , style of instruction : Loadi r1 , i100
+        regfile.write(ins[1] , ins[2]) #to fix : merge into Load instruction by adding flag immediate , maybe
+
+    @staticmethod
+    def Store(ins : list) -> None:
+        mem.write(ins[2] , regfile.read(ins[1] , 0)[0])#just use one read port of regfile
+    
+    @staticmethod
+    def Add(ins : list) -> None:
+        r1 , r2 = regfile.read(ins[2] , ins[3])
+        regfile.write(ins[1] , r1 + r2)
+
+    def exec_ins(self , ins : list) -> None:
+        target_ins = getattr(type(self) , ins[0])
+        if target_ins is None:
+            raise RuntimeError("the error instruction {}".format(ins[0]))
+        target_ins(ins)
+        
+    @staticmethod
+    def parse_instruction(instruc : str) -> list:
+        #parse the instruction
+        _ins = re.split(r'[ ,]+' , instruc)# split the string by space and comma
+        ins = [_ins[0]]
+        for elem in _ins:
+            if elem[0] == 'r' or elem[0] == '#' or elem[0] == 'i':#get the bias of address
+                ins.append(int(elem[1:]))
+
+        return ins
+
+class testbench:
+
+    @staticmethod
+    def memory_random_flip():
+        mem.mem = {key : random.randint(-10,10) for key in mem.mem}
+    
+    @staticmethod
+    def test():
+        testbench.memory_random_flip()
+        mem.info()
+        test_instr = ['Load r1, #0' , 'Load r2,#1' , 'Add r3, r1, r2' , 'Store r3, #3']
+        for ins in test_instr:
+            _ins = instruction.parse_instruction(ins)
+            instruc.exec_ins(_ins)
+
+        mem.info()
+
+
+
+if __name__ == '__main__':
+    
+    global regfile , mem , instruc
+    regfile = register_file(32)#32 32-bit registers
+    mem = memory(2 ** 32)# 32-bit address memory
+    instruc = instruction()
+
+    testbench.test()
+    while True:
+        _inst = input('>')
+        inst  = instruc.parse_instruction(_inst)
+        instruc.exec_ins(inst)
+        mem.info()
+        regfile.info()
--- a/advanced-computer-architecture/homework/lecture01/review.md
+++ b/advanced-computer-architecture/homework/lecture01/review.md
@ -0,0 +1,51 @@
+**第一章 量化设计与分析基础**
+
+RISC体系结构性能优化技术：
+
+* 指令级并行(最初通过流水线，后通过多发射)
+* 缓存
+
+Intel也在向RISC体系结构靠拢，在内部将部分x86指令转换成类似于RISC的指令，以便使用RISC相关的性能优化技术。在手机等低端领域，体系结构抛弃了功耗大、硅片面积成本高的x86体系结构,ARM逐渐成为主流。
+
+CPU性能提升在经历了17年(1986-2003)平均增长率52%的疯狂增长后，由于功耗大、指令级并行有限这两大瓶颈，单核性能增长放缓。2004年，Intel取消了高性能单核项目，转而研究多核。这标志着处理器性能提升的视角从ILP向DLP、TLP、RLP的转变。
+
+SISD、SIMD、MISD、MIMD
+
+处理器的瓶颈更在于带宽，经验公式表明，带宽增长带来的性能提升至少是延迟带来性能提升的平方。
+
+**TDP(热设计功耗)**是指硬件在正常运行（通常指最高负载的状态）时产生的最大热量或功率。这一值通常会在硬件的规格说明书中列出。TDP 不是设备的实际功耗，而是一个设计参数，提供给系统设计者和生产商用于选择合适的冷却解决方案（如风扇、散热器等）。
+
+MTTF : Mean Time To Failure
+
+MTTR : Mean Time To Repair
+
+MTBF : Mean Time Between Failure
+
+MTBF = MTTR ＋ MTTF
+
+Amdahl定律：
+
+$新执行时间 = 原执行时间 × ((1 - 升级比例) + \frac{升级比例}{升级加速比})$
+
+$总加速比 = \frac{1}{(1 - 升级比例) + \frac{升级比例}{升价加速比}}$
+
+
+
+
+
+
+
+**Intel Shows Its Tiger Lake CPU Die, Details What’s New & What’s The Same on It’s 11th Gen Mobility Lineup**
+
+Tiger Lake是酷睿11代的产品，主要用于笔记本、游戏本等移动设备：
+
+* 基于10nm superFin工艺
+* 采用Willow Cover核心架构
+* 对Sunny Cove架构进行了改进，实现了更大的吞吐量
+* 采用Xe LP显卡和媒体引擎、12 MB的L3缓存，支持8K显示的新显示引擎、具有6个摄像头传感器的IPU6以及对LPDDR5内存的支持
+
+Tiger Lake CPU系列分为三个版本：Y系列、U系列、H系列
+
+* Tiger Lake-Y : TDP(thermal design power热设计功耗)为4.5-9W，具有4核8线程，并支持LPDDR4X内存
+* Tiger Lake-U : TDP为15-28W,4核8线程,主频可达4.5GHz
+* Tiger Lake-H : TDP为35W-45W/65W，8核16线程，34MB缓存
--- a/advanced-computer-architecture/homework/lecture01/review.pdf
+++ b/advanced-computer-architecture/homework/lecture01/review.pdf
--- a/microarchitecture-and-design/note.md
+++ b/microarchitecture-and-design/note.md
@ -0,0 +1,72 @@
+ppa : performance,power,area
+
+处理器设计流程：微架构定义(指令集定义)、逻辑实现、物理实现(后端)
+
+计算机硬件：控制器、运算器、存储器、输入输出设备
+
+体系结构可以笼统概括为Interface between HW and SW
+
+SW:programming language , algorithm , application
+
+ISA(instruction set architecture)
+
+HW:microarchitecture,RTL,circuit,physics
+
+
+
+如何在体系结构发展中同时支持软件正常工作？
+
+兼容
+
+先前兼容(forward)：开发的
+
+向后兼容(backward)
+
+向后兼容是软件兼容的根本特性，也是系列机的根本特性
+
+
+
+**CISC**
+
+* 指令不定长
+* 除load,store外，其他指令也可以访问存储器
+* 单条指令可以完成复杂的操作
+
+**RISC**
+
+* 指令定长
+* 只有load/store可以访问存储器
+* 单条指令完成简单的操作
+
+例如实现两数相乘：
+
+CISC ： MUL [0x100c] [0x1004] [0x1008]
+
+RISC :  Load Ra 0x1004
+
+Load Rb 0x1008
+
+Mul Rc Ra Rb
+
+Store Rc 0x100c
+
+
+
+CISC单指令复杂的历史原因：
+
+* 存储器昂贵、速度慢，希望不定长的指令提供更大的代码密度
+* 编译器不发达
+
+CISC为向后兼容，指令只增不减，但只有20%的指令反复使用，剩下80%的指令使用频率低
+
+CISC代表：x86
+
+RISC(reduced instruction set computer)代表：power,mips,arm,risc-v
+
+ARM架构：A系列高性能，R系列实时处理，M系列工控
+
+现代体系结构挑战：
+
+* 单核性能增长缓慢：①Dennard scaling失效(晶体管尺寸减小，功耗并不会明显降低，甚至因为漏电流导致功耗增加(ARM因为注重低功耗，主频只有3GHz))、②有限的指令集并行
+* moore's law 变缓
+* 安全问题：幽灵漏洞、熔断漏洞