RTL设计和HLS高层次设计
一、RTL设计和HLS高层次设计1.rtl设计需要关注微架构的决策高层次设计不需要制定微架构决策关注的是宏框架设计2.FSM状态机的创建、数据的路径、寄存器流水线这些细节留给HLS工具编译器来处理3.高层次综合通过提供的约束来生成优化的rtl4.高层次综合在宏框架上做出解决方案在性能和面积之间做权衡和取舍二、任务级别的并行度1.可以使用dataflow编译指令或者使用hls::task对象来显式创建并行度2.存储器架构访问全局存储器会产生更高的时延成本可耗时大量周期而访问本地存储器通常十分快速只需一个或多个周期即可。三、函数级别并行设计1.在函数级别实现任务级并行度。为实现任务级并行度需将循环推送到多个独立的函数中。原始 compute() 函数拆分为多个子函数。根据经验法则顺序函数可并发执行顺序循环则可采用流水打拍。2.没有优化之前的代码void compute (data_t in[totalNumWords ], data_t Out[totalNumWords ]) {data_t tmp1[totalNumWords], tmp2[totalNumWords];A: for (int i 0; i totalNumWords ; i) {tmp1[i] in[i] * 3;tmp2[i] in[i] * 3;}B: for (int i 0; i totalNumWords ; i) {tmp1[i] tmp1[i] 25;}C: for (int i 0; i totalNumWords ; i) {tmp2[i] tmp2[i] * 2;}D: for (int i 0; i totalNumWords ; i) {out[i] tmp1[i] tmp2[i] * 2;}3.优化设计#include diamond.h#define NUM_WORDS 16extern C {void diamond(vecOf16Words* vecIn, vecOf16Words* vecOut, int size){hls::streamvecOf16Words c0, c1, c2, c3, c4, c5;assert(size % 16 0);#pragma HLS dataflowload(vecIn, c0, size);compute_A(c0, c1, c2, size);compute_B(c1, c3, size);compute_C(c2, c4, size);compute_D(c3, c4,c5, size);store(c5, vecOut, size);}}void load(vecOf16Words *in, hls::streamvecOf16Words out, int size){Loop0:for (int i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32out.write(in[i]);}}void compute_A(hls::streamvecOf16Words in, hls::streamvecOf16Words out1, hls::streamvecOf16Words out2, int size){Loop0:for (int i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32vecOf16Words t in.read();out1.write(t * 3);out2.write(t * 3);}}void compute_B(hls::streamvecOf16Words in, hls::streamvecOf16Words out, int size){Loop0:for (int i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32out.write(in.read() 25);}}void compute_C(hls::streamvecOf16Words in, hls::streamvecOf16Words out, int size){Loop0:for (data_t i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32out.write(in.read() * 2);}}void compute_D(hls::streamvecOf16Words in1, hls::streamvecOf16Words in2, hls::streamvecOf16Words out, int size){Loop0:for (data_t i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32out.write(in1.read() in2.read());}}void store(hls::streamvecOf16Words in, vecOf16Words *out, int size){Loop0:for (int i 0; i size; i){#pragma HLS PERFORMANCE target_ti32#pragma HLS LOOP_TRIPCOUNT max32out[i] in.read();}}
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2561680.html
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!