diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9148455 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*/*.pdf \ No newline at end of file diff --git a/advanced-computer-architecture/homework/lecture01/lecture01-instruc-sim.py b/advanced-computer-architecture/homework/lecture01/lecture01-instruc-sim.py new file mode 100644 index 0000000..c1fe3aa --- /dev/null +++ b/advanced-computer-architecture/homework/lecture01/lecture01-instruc-sim.py @@ -0,0 +1,136 @@ +# Memory +# 32-bit address +# 8-bit cell +# Register File +# ·32 32-bit registers, with 2 read ports and 1 write portTest bench +# / Add the number in memory address 0 and 1 to address 3 +# Load r1, #0 +# Load r2,#1 +# Add r3, r1, r2 +# Store r3, #3 +import numpy as np +import re +import random +class register_file: + + def __init__(self , size) -> None: + self.regs = np.array([0] * size , dtype=np.int32) + self.size = size + + def read(self , idx1 : int , idx2 : int) -> tuple:# 2 read port + if idx1 < 0 or idx1 >= self.size or idx2 < 0 or idx2 >= self.size: + raise IndexError(f"The index is out of range") + + return (self.regs[idx1] , self.regs[idx2]) + + def write(self , idx : int , value : np.int32) -> None:# 1 write port + if idx < 0 or idx >= self.size: + raise IndexError("The index is out of range") + + if value < -2**31 or value > 2**31 - 1: + raise ValueError("The value is out of range") + + self.regs[idx] = value + + def info(self) -> None: + print(f"register file : {self.regs}") + +class memory: + def __init__(self , size): + self.size = size + self.mem = {key : np.int8(0) for key in range(10)}#8-bit cell + # use dictionary to simulate 2**32 byte-memory + + def reset(self): + self.mem = {key : np.int8(0) for key in self.mem} + + def info(self) -> None: + print(f"memory : {self.mem}") + + def read(self , idx : int) -> np.int8: + if idx < 0 or idx >= self.size: + raise IndexError("The index is out of range") + + if idx not in self.mem: + self.mem[idx] = np.int8(0) + return self.mem[idx] + + def write(self , idx , value : np.int8) -> None: + if idx < 0 or idx >= self.size: + raise IndexError("The index is out of range") + + if(value < -128 or value > 127): + raise ValueError("The value is out of range") + + self.mem[idx] = value + + +class instruction: + + @staticmethod + def Load(ins : list , immediate: bool = False) -> None: + regfile.write(ins[1] , mem.read(ins[2])) + + @staticmethod + def Loadi(ins : list):#load immediate to register , just for test , style of instruction : Loadi r1 , i100 + regfile.write(ins[1] , ins[2]) #to fix : merge into Load instruction by adding flag immediate , maybe + + @staticmethod + def Store(ins : list) -> None: + mem.write(ins[2] , regfile.read(ins[1] , 0)[0])#just use one read port of regfile + + @staticmethod + def Add(ins : list) -> None: + r1 , r2 = regfile.read(ins[2] , ins[3]) + regfile.write(ins[1] , r1 + r2) + + def exec_ins(self , ins : list) -> None: + target_ins = getattr(type(self) , ins[0]) + if target_ins is None: + raise RuntimeError("the error instruction {}".format(ins[0])) + target_ins(ins) + + @staticmethod + def parse_instruction(instruc : str) -> list: + #parse the instruction + _ins = re.split(r'[ ,]+' , instruc)# split the string by space and comma + ins = [_ins[0]] + for elem in _ins: + if elem[0] == 'r' or elem[0] == '#' or elem[0] == 'i':#get the bias of address + ins.append(int(elem[1:])) + + return ins + +class testbench: + + @staticmethod + def memory_random_flip(): + mem.mem = {key : random.randint(-10,10) for key in mem.mem} + + @staticmethod + def test(): + testbench.memory_random_flip() + mem.info() + test_instr = ['Load r1, #0' , 'Load r2,#1' , 'Add r3, r1, r2' , 'Store r3, #3'] + for ins in test_instr: + _ins = instruction.parse_instruction(ins) + instruc.exec_ins(_ins) + + mem.info() + + + +if __name__ == '__main__': + + global regfile , mem , instruc + regfile = register_file(32)#32 32-bit registers + mem = memory(2 ** 32)# 32-bit address memory + instruc = instruction() + + testbench.test() + while True: + _inst = input('>') + inst = instruc.parse_instruction(_inst) + instruc.exec_ins(inst) + mem.info() + regfile.info() \ No newline at end of file diff --git a/advanced-computer-architecture/homework/lecture01/review.md b/advanced-computer-architecture/homework/lecture01/review.md new file mode 100644 index 0000000..275e8fb --- /dev/null +++ b/advanced-computer-architecture/homework/lecture01/review.md @@ -0,0 +1,51 @@ +**第一章 量化设计与分析基础** + +RISC体系结构性能优化技术: + +* 指令级并行(最初通过流水线,后通过多发射) +* 缓存 + +Intel也在向RISC体系结构靠拢,在内部将部分x86指令转换成类似于RISC的指令,以便使用RISC相关的性能优化技术。在手机等低端领域,体系结构抛弃了功耗大、硅片面积成本高的x86体系结构,ARM逐渐成为主流。 + +CPU性能提升在经历了17年(1986-2003)平均增长率52%的疯狂增长后,由于功耗大、指令级并行有限这两大瓶颈,单核性能增长放缓。2004年,Intel取消了高性能单核项目,转而研究多核。这标志着处理器性能提升的视角从ILP向DLP、TLP、RLP的转变。 + +SISD、SIMD、MISD、MIMD + +处理器的瓶颈更在于带宽,经验公式表明,带宽增长带来的性能提升至少是延迟带来性能提升的平方。 + +**TDP(热设计功耗)**是指硬件在正常运行(通常指最高负载的状态)时产生的最大热量或功率。这一值通常会在硬件的规格说明书中列出。TDP 不是设备的实际功耗,而是一个设计参数,提供给系统设计者和生产商用于选择合适的冷却解决方案(如风扇、散热器等)。 + +MTTF : Mean Time To Failure + +MTTR : Mean Time To Repair + +MTBF : Mean Time Between Failure + +MTBF = MTTR + MTTF + +Amdahl定律: + +$新执行时间 = 原执行时间 × ((1 - 升级比例) + \frac{升级比例}{升级加速比})$ + +$总加速比 = \frac{1}{(1 - 升级比例) + \frac{升级比例}{升价加速比}}$ + + + + + + + +**Intel Shows Its Tiger Lake CPU Die, Details What’s New & What’s The Same on It’s 11th Gen Mobility Lineup** + +Tiger Lake是酷睿11代的产品,主要用于笔记本、游戏本等移动设备: + +* 基于10nm superFin工艺 +* 采用Willow Cover核心架构 +* 对Sunny Cove架构进行了改进,实现了更大的吞吐量 +* 采用Xe LP显卡和媒体引擎、12 MB的L3缓存,支持8K显示的新显示引擎、具有6个摄像头传感器的IPU6以及对LPDDR5内存的支持 + +Tiger Lake CPU系列分为三个版本:Y系列、U系列、H系列 + +* Tiger Lake-Y : TDP(thermal design power热设计功耗)为4.5-9W,具有4核8线程,并支持LPDDR4X内存 +* Tiger Lake-U : TDP为15-28W,4核8线程,主频可达4.5GHz +* Tiger Lake-H : TDP为35W-45W/65W,8核16线程,34MB缓存 \ No newline at end of file diff --git a/advanced-computer-architecture/homework/lecture01/review.pdf b/advanced-computer-architecture/homework/lecture01/review.pdf new file mode 100644 index 0000000..3dce568 Binary files /dev/null and b/advanced-computer-architecture/homework/lecture01/review.pdf differ diff --git a/microarchitecture-and-design/note.md b/microarchitecture-and-design/note.md new file mode 100644 index 0000000..3d4c258 --- /dev/null +++ b/microarchitecture-and-design/note.md @@ -0,0 +1,72 @@ +ppa : performance,power,area + +处理器设计流程:微架构定义(指令集定义)、逻辑实现、物理实现(后端) + +计算机硬件:控制器、运算器、存储器、输入输出设备 + +体系结构可以笼统概括为Interface between HW and SW + +SW:programming language , algorithm , application + +ISA(instruction set architecture) + +HW:microarchitecture,RTL,circuit,physics + + + +如何在体系结构发展中同时支持软件正常工作? + +兼容 + +先前兼容(forward):开发的 + +向后兼容(backward) + +向后兼容是软件兼容的根本特性,也是系列机的根本特性 + + + +**CISC** + +* 指令不定长 +* 除load,store外,其他指令也可以访问存储器 +* 单条指令可以完成复杂的操作 + +**RISC** + +* 指令定长 +* 只有load/store可以访问存储器 +* 单条指令完成简单的操作 + +例如实现两数相乘: + +CISC : MUL [0x100c] [0x1004] [0x1008] + +RISC : Load Ra 0x1004 + +Load Rb 0x1008 + +Mul Rc Ra Rb + +Store Rc 0x100c + + + +CISC单指令复杂的历史原因: + +* 存储器昂贵、速度慢,希望不定长的指令提供更大的代码密度 +* 编译器不发达 + +CISC为向后兼容,指令只增不减,但只有20%的指令反复使用,剩下80%的指令使用频率低 + +CISC代表:x86 + +RISC(reduced instruction set computer)代表:power,mips,arm,risc-v + +ARM架构:A系列高性能,R系列实时处理,M系列工控 + +现代体系结构挑战: + +* 单核性能增长缓慢:①Dennard scaling失效(晶体管尺寸减小,功耗并不会明显降低,甚至因为漏电流导致功耗增加(ARM因为注重低功耗,主频只有3GHz))、②有限的指令集并行 +* moore's law 变缓 +* 安全问题:幽灵漏洞、熔断漏洞 \ No newline at end of file