@inproceedings{Dai2025CoMP,
author={Dai, Wenhao and Jia, Ziyi and Bai, Yuesi and Sun, Qingxiao},
journal={CCF Transactions on High Performance Computing},
title={Convergence-aware operator-wise mixed-precision training},
year={2025},
doi={10.1007/s42514-024-00208-9}
}
@inproceedings{Lu2024AmgT,
author={Lu, Yuechen and Zeng, Lijie and Wang, Tengcheng and Fu, Xu and Li, Wenxuan and Cheng, Helin and Yang, Dechuang and Jin, Zhou and Casas, Marc and Liu, Weifeng},
title={AmgT: Algebraic Multigrid Solver on Tensor Cores},
booktitle={37th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
pages={823-838},
year={2024},
organization={ACM/IEEE},
}
@inproceedings{Yang2024Millefeuille,
author={Yang, Dechuang and Zhao, Yuxuan and Niu, Yiduo and Jia, Weile and Shao, En and Liu, Weifeng and Tan, Guangming and Jin, Zhou},
title={Mille-feuille: A Tile-Grained Mixed Precision Single-Kernel Conjugate Gradient Solver on GPUs},
booktitle={37th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
pages={916-931},
year={2024},
organization={ACM/IEEE},
}
@inproceedings{2024-niu-ISLU,
author={Niu, Dan and Tao, Yiyang and Jin, Zhou and Dong, Yichao and Wang, Chao and Sun, Changyin},
title={ISLU: Indexing-Efficient Sparse LU Factorization for Circuit Simulation on GPUs},
booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
pages={1--6},
year={2024},
}
@inproceedings{zhao2024CSP,
author={Zhao, Yuxuan and Yang, Xiaoyu and Bai, Yinuo and Zeng, Lijie and Niu, Dan and Liu, Weifeng and Jin, Zhou},
title={CSP: Comprehensively-Sparsified Preconditioner for Efficient nonlinear Circuit Simulation},
booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
pages={1--6},
year={2024},
}
@inproceedings{li2024Leda,
author={Yi, Enxin and Bai, Jiarui and Nie, Yijie and Niu, Dan and Jin, Zhou and Liu, Weifeng},
title={Leda: Leveraging Tiling Dataflow to Accelerate SpMM on HBM-Equipped FPGAs for GNNs},
booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
pages={1--6},
year={2024},
}
@inproceedings{sun2024SodaPTA,
author={Sun, Jiatai and Zha, Xiaru and Wang, Chao and Wu, Xiao and Niu, Dan and Xing, Wei and Jin, Zhou},
title={Pseudo Adjoint Optimization: Harnessing the Solution Curve for SPICE Acceleration},
booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
pages={1--9},
year={2024},
}
@inproceedings{guo2024heterogeneous,
author={Guo, Zizheng, Huang, Tsung-Wei and Jin, Zhou and Zhou, Cheng and Lin, Yibo and Wang, Runsheng and Huang, Ru},
title={Heterogeneous Static Timing Analysis with Advanced Delay Calculator},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{jiang2024aparallel,
author={Jiang, Yangfan and Song, Jianfei and Yin, Xunzhao and Dong, Xiao and Sun, Songyu and Lin, Yibo and Jin, Zhou and Yang, Xiaoyu and Zhuo, Cheng},
title={A Parallel Simulation Framework Incorporating Machine Learning-Based Hotspot Detection for Accelerated Power Grid Analysis},
booktitle={3rd ACM/IEEE International Symposium on Machine Learning for CAD (MLCAD)},
year={2024},
}
@inproceedings{dong2024isptnet,
author={Dong, Yichao and Niu, Dan and Jin, Zhou and Zhang, Chuan and Sun, Changyin and Zhou, Zhenya},
title={ISPT-Net: A Noval Transient Backward-stepping Reduction Policy by Irregular Sequential PredictionTransformer},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{bai2024efficient,
author={Bai, Yinuo and Yang, Xiaoyu and Lu, Yicheng and Niu, Dan and Zhou, Cheng and Jin, Zhou and Liu, Weifeng},
title={Efficient Spectral-Aware Power Supply Noise Analysis for Low-Power Design Verification},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{jin2024msh,
author={jin, Zhou and Feng, Tian and Wu Xiao and Niu, Dan and Zhou, Zhenya and Zhou Cheng},
title={MSH: A Multi-Stage HiZ-Aware Homotopy Framework for Nonlinear DC Analysis},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{chen2024tsapicer,
author={Chen, Pengju and Niu, Dan and jin, Zhou and Sun, Changyin Sun and li, Qi and Yan, Hao},
title={TSA-TICER: A Two-Stage TICER Acceleration Framework for Model Order Reduction},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{yi2024cuper,
author={yi, Enxin and Duan, Yiru and Bai, Yinuo and Zhao, Kang and Jin, Zhou and Liu, Weifeng},
title={Cuper: Customized Dataflow and Perceptual Decoding for Sparse Matrix-Vector Multiplication on HBM-Equipped FPGAs},
booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
pages={1--6},
year={2024},
}
@inproceedings{Yang2024thSORT,
author={Mouzhi Yang, Peng Zhang, Jianbin Fang, Weifeng Liu, Chun Huang},
journal={CCF Transactions on High Performance Computing},
title={thSORT: An Efficient Parallel Sorting Algorithm on Multi‑core DSPs},
year={2024},
doi={10.1007/s42514-023-00175-7}
}
@inproceedings{li2023haspmv,
author={Wenxuan Li, Helin Cheng, Zhengyang Lu, Yuechen Lu, Weifeng Liu},
title={HASpMV: Heterogeneity-Aware Sparse Matrix-Vector Multiplication on Modern Asymmetric Multicore Processors},
booktitle={2023 IEEE International Conference on Cluster Computing (CLUSTER)},
pages={1--12},
year={2023},
organization={IEEE}
}
@inproceedings{liao2023exploiting,
author={Jianjin Liao, Mingzhen Li, Hailong Yang, Qingxiao Sun, Biao Sun, Jiwei Hao, Tianyu Feng, Fengwei Yu, Shengdong Chen, Ye Tao, Zicheng Zhang, Zhongzhi Luan, Depei Qian},
title={Exploiting Input Tensor Dynamics in Activation Checkpointing for Efficient Training on GPUIPDS},
booktitle={2023 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
pages={1--15},
year={2023},
organization={IEEE}
}
@ARTICLE{sun2023Adaptive,
author={Qingxiao Sun, Yi Liu, Hailong Yang, Zhonghui Jiang, Zhongzhi Luan, and Depei Qian},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={Adaptive Auto-tuning Framework for Global Exploration of Stencil Optimization on GPUs},
year={2023},
volume={33},
number={1},
pages={1-15},
doi={10.1109/TPDS.2023.3325630},
}
@inproceedings{Lu2023DASP,
author={Yuechen Lu and Liu, Weifeng},
title={DASP: Specific Dense Matrix Multiply-Accumulate Units Accelerated General Sparse Matrix-Vector Multiplication},
booktitle={36th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
year={2023},
organization={ACM/IEEE},
}
@inproceedings{Fu2023PanguLU,
author={Xu Fu, Bingbin Zhang, Tengcheng Wang, Wenhao Li, Yuechen Lu, Enxin Yi, Jianqi Zhao, Xiaohan Geng, Fangying Li, Jingwen Zhang, Zhou Jin and Liu, Weifeng},
title={PanguLU: A Scalable Regular Two-Dimensional Block-Cyclic Sparse Direct Solver on Distributed Heterogeneous Systems},
booktitle={36th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
year={2023},
organization={ACM/IEEE},
}
@inproceedings{Lu2023TileSpTRSV,
author={Lu, Zhengyang and Liu, Weifeng},
journal={CCF Transactions on High Performance Computing},
title={TileSpTRSV: a tiled algorithm for parallel sparse triangular solve on GPUs},
year={2023},
volume={5},
number={2},
pages={129-143},
doi={10.1007/s42514-023-00151-1},
}
@inproceedings{Cheng2023HASpGEMM,
author={Cheng, Helin and Li, Wenxuan and Lu, Yuechen and Liu, Weifeng},
title={HASpGEMM: Heterogeneity-Aware Sparse General Matrix-Matrix Multiplication on Modern Asymmetric Multicore Processors},
booktitle={2023 Proceedings of the 52nd International Conference on Parallel Processing (ICPP)},
pages={807--817},
year={2023},
}
@inproceedings{fan2024ReCG,
author={Fan, Mingjia and Cheng, Xiaoming and Yang, Dechuang and Jin, Zhou and Liu, Weifeng},
title={ReCG: ReRAM-Accelerated Sparse Conjugate Gradient},
booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2024},
organization={ACM/IEEE}
}
@inproceedings{bai2024unleashing,
author={Bai, Yinuo and Yi, Enxin and Xing, Wei and Yu, Bei and Jin, Zhou},
title={Unleashing the Potential of AQFP Logic Placement via Entanglement Entropy and Projection},
booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2024},
organization={ACM/IEEE}
}
@inproceedings{li2024MASC,
author={Li, Chenxi and Zhang, Boyuan and Duan, Yongqiang and Li, Yang and Ye, Zuochang and Liu, Weifeng, and Tao, Dingwen and Jin, Zhou},
title={MASC: A Memory-Efficient Adjoint Sensitivity Analysis through Compression Using Novel Spatiotemporal Prediction},
booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2024},
organization={ACM/IEEE}
}
@inproceedings{wang2024MAUnet,
author={Wang, Mingyue and Cheng, Yuanqing and Lin, Yage and Peng, Kelin and Yang Shunchuan and Jin, Zhou and Xing, Wei},
title={MAUnet: Multiscale Attention U-Net for Effective IR Drop Prediction},
booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2024},
organization={ACM/IEEE}
}
@inproceedings{wang2023Accelerating,
author={Wang, Tengcheng and Li, Wenhao and Pei, Haojie and Sun, Yuying and Jin, Zhou and Liu, Weifeng},
title={Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation},
booktitle={2023 60th ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2023},
organization={ACM/IEEE}
}
@inproceedings{fan2023AmgR,
author={Fan, Mingjia and Tian, Xiaotian and He, Yintao and Li, Junxian and Duan, Yiru and Hu, Xiaozhe and Wang, Ying and Jin, Zhou and Liu, Weifeng},
title={AmgR: Algebraic Multigrid Accelerated on ReRAM},
booktitle={2023 60th ACM/IEEE Design Automation Conference (DAC)},
pages={1--6},
year={2023},
organization={ACM/IEEE}
}
@inproceedings{mi2023balancing,
author={Mi, Hongli and Yu, Xiangrui and Yu, Xiaosong and Wu, Shuangyuan and Liu, Weifeng},
title={Balancing Computation and Communication in Distributed Sparse Matrix-Vector Multiplication},
booktitle={2023 IEEE/ACM 23rd International Symposium on Cluster, Cloud and Internet Computing (CCGrid)},
pages={535--544},
year={2023},
organization={IEEE}
}
@inproceedings{li2024EMGA,
author={Li, Yang and Duan, Yongqiang and Zhang, Hao and Niu, Dan and Wu, Xiao and Jin, Zhou},
title={EMGA: An Evolutionary Memory Grouping Algorithm for MBIST},
booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
year={2024},
organization={IEEE}
}
@inproceedings{deng2024sdssta,
author={Deng, Fuxing and Feng, Yihang and Niu, Dan and Wu, Xiao and Jin, Zhou},
title={SD-SSTA: Statistical Static Time Analysis Algorithm Considering Skewed Distribution},
booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
year={2024},
organization={IEEE}
}
@inproceedings{qi2024UnetPro,
author={Qi, Zhengfei and Wang, Wanchao and Yu, Chengxuan and Niu, Dan and Wu, Xiao and Jin, Zhou},
title={UnetPro: Combining Attention with Skip Connection in Unet for Efficient IR Drop Prediction},
booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
year={2024},
organization={IEEE}
}
@inproceedings{zhang2024pticer,
author={Zhang, Zijia and Niu, Dan and Jin, Zhou and Chen, Pengju and Zhou, Zhenya and Sun, Chengyin},
title={P-TICER: An Effective Parallel TICER Acceleration Method for Model Order Reduction},
booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
year={2024},
organization={IEEE}
}
@inproceedings{zha2023Deeplearning,
author={Zha, Xiaru and Pei, Haojie and Niu, Dan and Wu, Xiao and Jin, Zhou},
title={Deep Learning Enhanced Time-Step Control in Pseudo Transient Analysis for Efficient Nonlinear DC Simulation},
booktitle={2023 International Symposium of Electronics Design Automation (ISEDA)},
pages={23--28},
year={2023},
organization={IEEE}
}
@article{yu2018improving,
author={Yu, Chao and Bai, Yuebin and Sun, Qingxiao and Yang, Hailong},
title={Improving thread-level parallelism in GPUs through expanding register file to scratchpad memory},
journal={ACM Transactions on Architecture and Code Optimization (TACO)},
volume={15},
number={4},
pages={1--24},
year={2018},
publisher={ACM New York, NY, USA}
}
@inproceedings{dun2020accelerating,
author={Dun, Ming and Li, Yunchun and You, Xin and Sun, Qingxiao and Luan, Zerong and Yang, Hailong},
title={Accelerating De Novo Assembler WTDBG2 on Commodity Servers},
booktitle={International Conference on Algorithms and Architectures for Parallel Processing},
pages={232--246},
year={2020},
organization={Springer}
}
@article{dun2021towards,
author={Dun, Ming and Li, Yunchun and Sun, Qingxiao and Yang, Hailong and Li, Wei and Luan, Zhongzhi and Gan, Lin and Yang, Guangwen and Qian, Depei},
title={Towards efficient canonical polyadic decomposition on sunway many-core processor},
journal={Information Sciences},
volume={549},
pages={221--248},
year={2021},
publisher={Elsevier}
}
@article{li2020deep,
author={Li, Mingzhen and Liu, Yi and Liu, Xiaoyan and Sun, Qingxiao and You, Xin and Yang, Hailong and Luan, Zhongzhi and Gan, Lin and Yang, Guangwen and Qian, Depei},
title={The deep learning compiler: A comprehensive survey},
journal={IEEE Transactions on Parallel and Distributed Systems},
volume={32},
number={3},
pages={708--727},
year={2020},
publisher={IEEE}
}
@article{xiao2021highly,
author={Xiao, Zhiyong and Liu, Xu and Xu, Jingheng and Sun, Qingxiao and Gan, Lin},
title={Highly scalable parallel genetic algorithm on sunway many-core processors},
journal={Future Generation Computer Systems},
volume={114},
pages={679--691},
year={2021},
publisher={Elsevier}
}
@inproceedings{li2021automatic,
author={Li, Mingzhen and Liu, Yi and Yang, Hailong and Hu, Yongmin and Sun, Qingxiao and Chen, Bangduo and You, Xin and Liu, Xiaoyan and Luan, Zhongzhi and Qian, Depei},
title={Automatic code generation and optimization of large-scale stencil computation on many-core processors},
booktitle={Proceedings of the 50th International Conference on Parallel Processing},
pages={1--12},
year={2021}
}
@inproceedings{hao2022towards,
author={Hao, Jiwei and Yang, Hailong and Sun, Qingxiao and Zhang, Huaitao and Luan, Zhongzhi and Qian, Depei},
title={Towards Optimized Streaming Tensor Completion on multiple GPUs},
booktitle={2022 IEEE 24th Int Conf on High Performance Computing \& Communications; 8th Int Conf on Data Science \& Systems; 20th Int Conf on Smart City; 8th Int Conf on Dependability in Sensor, Cloud \& Big Data Systems \& Application (HPCC/DSS/SmartCity/DependSys)},
pages={1123--1128},
year={2022},
organization={IEEE}
}
@inproceedings{dun2021optimized,
author={Dun, Ming and Li, Yunchun and Yang, Hailong and Sun, Qingxiao and Luan, Zhongzhi and Qian, Depei},
title={An optimized tensor completion library for multiple GPUs},
booktitle={Proceedings of the ACM International Conference on Supercomputing},
pages={417--430},
year={2021}
}
@inproceedings{sun2021cstuner,
author={Sun, Qingxiao and Liu, Yi and Yang, Hailong and Jiang, Zhonghui and Liu, Xiaoyan and Dun, Ming and Luan, Zhongzhi and Qian, Depei},
title={cstuner: Scalable auto-tuning framework for complex stencil computation on gpus},
booktitle={2021 IEEE International Conference on Cluster Computing (CLUSTER)},
pages={192--203},
year={2021},
organization={IEEE}
}
@inproceedings{sun2022cognn,
author={Sun, Qingxiao and Liu, Yi and Yang, Hailong and Zhang, Ruizhe and Dun, Ming and Li, Mingzhen and Liu, Xiaoyan and Xiao, Wencong and Li, Yong and Luan, Zhongzhi and others},
title={CoGNN: efficient scheduling for concurrent GNN training on GPUs},
booktitle={SC22: International Conference for High Performance Computing, Networking, Storage and Analysis},
pages={1--15},
year={2022},
organization={IEEE}
}
@inproceedings{sun2022stencilmart,
author={Sun, Qingxiao and Liu, Yi and Yang, Hailong and Jiang, Zhonghui and Luan, Zhongzhi and Qian, Depei},
title={Stencilmart: Predicting optimization selection for stencil computations across gpus},
booktitle={2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
pages={875--885},
year={2022},
organization={IEEE}
}
@article{sun2022qos,
author={Sun, Qingxiao and Yi, Liu and Yang, Hailong and Li, Mingzhen and Luan, Zhongzhi and Qian, Depei},
journal={Parallel Computing},
title={QoS-aware dynamic resource allocation with improved utilization and energy efficiency on GPU},
volume={113},
pages={102958},
year={2022},
publisher={Elsevier}
}
@inproceedings{sun2020sptfs,
author={Sun, Qingxiao and Liu, Yi and Dun, Ming and Yang, Hailong and Luan, Zhongzhi and Gan, Lin and Yang, Guangwen and Qian, Depei},
title={Sptfs: Sparse tensor format selection for mttkrp via deep learning},
booktitle={SC20: International Conference for High Performance Computing, Networking, Storage and Analysis},
pages={1--14},
year={2020},
organization={IEEE}
}
@article{sun2021input,
author={Sun, Qingxiao and Liu, Yi and Yang, Hailong and Dun, Ming and Luan, Zhongzhi and Gan, Lin and Yang, Guangwen and Qian, Depei},
title={Input-aware sparse tensor storage format selection for optimizing mttkrp},
Yang, Guangwen and Qian, Depei},
journal={IEEE Transactions on Computers},
volume={71},
number={8},
pages={1968--1981},
year={2021},
publisher={IEEE}
}
@inproceedings{sun2019smqos,
author={Sun, Qingxiao and Liu, Yi and Yang, Hailong and Luan, Zhongzhi and Qian, Depei},
title={Smqos: Improving utilization and energy efficiency with qos awareness on gpus},
booktitle={2019 IEEE International Conference on Cluster Computing (CLUSTER)},
pages={1--5},
year={2019},
organization={IEEE}
}
@inproceedings{jin2024machine,
author={Jin, Zhou and Li, Wenhao and Bai, Yinuo Bai and Wang, Tengcheng and Lu, Yicheng and Liu, Weifeng},
title={Machine Learning and GPU Accelerated Sparse Linear Solvers for Transistor-Level Circuit Simulation: A Perspective Survey (Invited Paper)},
booktitle={2024 29th Asia and South Pacific Design Automation Conference (ASP-DAC)},
pages={96--101},
year={2024},
organization={IEEE}
}
@inproceedings{chen2022application,
author={Chen, Yufei and Pei, Haojie and Dong, Xiao and Jin, Zhou and Zhuo, Cheng},
title={Application of deep learning in back-end simulation: Challenges and opportunities},
booktitle={2022 27th Asia and South Pacific Design Automation Conference (ASP-DAC)},
pages={641--646},
year={2022},
organization={IEEE}
}
@inproceedings{2022-ji-tilespmspv,
author = {Ji, Haonan and Song, Huimin and Lu, Shibo and Jin, Zhou and Tan, Guangming and Liu, Weifeng},
title = {TileSpMSpV: A Tiled Algorithm for Sparse Matrix-Sparse Vector Multiplication on GPUs},
year = {2023},
publisher = {Association for Computing Machinery},
doi = {10.1145/3545008.3545028},
booktitle = {Proceedings of the 51st International Conference on Parallel Processing},
numpages = {11},
series = {ICPP '22}
}
@ARTICLE{2023-jin-ossp,
author={Niu, Dan and Dong, Yichao and Jin, Zhou and Zhang, Chuan and Li, Qi and Sun, Changyin},
journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
title={OSSP-PTA: An Online Stochastic Stepping Policy for PTA on Reinforcement Learning},
year={2023},
doi={10.1109/TCAD.2023.3251731},
}
@inbook{2022-dong-PTA,
author = {Dong, Yichao and Niu, Dan and Jin, Zhou and Zhang, Chuan and Li, Qi and Sun, Changyin},
title = {Adaptive Stepping PTA for DC Analysis Based on
Reinforcement Learning},
journal = {{IEEE Transactions on Circuits and Systems II: Express Briefs},
year={2023},
volume={70},
 doi={10.1109/TCSII.2022.3207356},
}
@inbook{2022-xing-BoA-PTA,
author = {Xing, Wei W. and Jin, Xiang and Feng, Tian and Niu, Dan and Zhao, Weishen and Jin, Zhou},
title = {BoA-PTA, An Bayesian Optimization Accelerated PTA Solver for SPICE Simulation},
year = {2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3555805},
journal = {ACM Trans. Des. Autom. Electron. Syst.},
month = {jul},
issn = {1084-4309},
doi = {10.1145/3555805},
}
@inbook{2022-jin-RLPTA,
author = {Jin, Zhou and Pei, Haojie and Dong, Yichao and Jin, Xiang and Wu, Xiao and Xing, Wei W.
and Niu, Dan},
title = {Accelerating Nonlinear DC Circuit Simulation with Reinforcement Learning},
year = {2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3489517.3530512},
doi = {10.1145/3489517.3530512},
booktitle = {Proceedings of the 59th ACM/IEEE Design Automation Conference},
pages = {619–624},
numpages = {6},
series = {DAC '22},
}
@inbook{2021-jin-PALBBD,
author = {Jin, Zhou and Feng, Tian and Duan, Yiru and Wu, Xiao and Cheng, Minghou and Zhou, Zhenya
and Liu, Weifeng},
title = {PALBBD: A Parallel ArcLength Method Using Bordered Block Diagonal Form for DC Analysis},
year = {2021},
isbn = {9781450383936},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3453688.3461526},
booktitle = {Proceedings of the 2021 on Great Lakes Symposium on VLSI},
pages = {327–332},
numpages = {6},
}
@inproceedings{2022-niu-tilespgemm,
author = {Niu, Yuyao and Lu, Zhengyang and Ji, Haonan and Song, Shuhui and Jin, Zhou and Liu,
Weifeng},
title = {TileSpGEMM: A Tiled Algorithm for Parallel Sparse General Matrix-Matrix Multiplication on
GPUs},
year = {2022},
isbn = {9781450392044},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3503221.3508431},
{Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
pages = {90-106},
numpages = {17},
series = {PPoPP '22'}
}
@ARTICLE{lu2021implementing,
author={Lu, Yuechen and Luo, Yuchen and Lian, Haocheng and Jin, Zhou and Liu, Weifeng},
journal={CCF Transactions on High Performance Computing},
title={Implementing LU and Cholesky factorizations on artificial intelligence accelerators},
year={2021},
volume={3},
number={3},
pages={286-297},
doi={10.1007/s42514-021-00075-8},
}
@INPROCEEDINGS{9586141,
author={Zhao, Jianqi and Wen, Yao and Luo, Yuchen and Jin, Zhou and Liu, Weifeng and Zhou,
Zhenya},
booktitle={2021 58th ACM/IEEE Design Automation Conference (DAC)},
title={SFLU: Synchronization-Free Sparse LU Factorization for Fast Circuit Simulation on GPUs},
year={2021},
pages={37-42},
doi={10.1109/DAC18074.2021.9586141},
}
@ARTICLE{9459513,
author={Xie, Zhen and Tan, Guangming and Liu, Weifeng and Sun, Ninghui},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={A Pattern-Based SpGEMM Library for Multi-Core and Many-Core Architectures},
year={2022},
volume={33},
number={1},
pages={159-175},
doi={10.1109/TPDS.2021.3090328},
}
@ARTICLE{9380961,
author={Zhang, Feng and Su, Jiya and Liu, Weifeng and He, Bingsheng and Wu, Ruofan and Du, Xiaoyong
and Wang, Rujia},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={YuenyeungSpTRSV: A Thread-Level and Warp-Level Fusion Synchronization-Free Sparse Triangular
Solve},
year={2021},
volume={32},
number={9},
pages={2321-2337},
doi={10.1109/TPDS.2021.3066635},
}
@ARTICLE{9373912,
author={Chen, Jing and Fang, Jianbin and Liu, Weifeng and Yang, Canqun},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={BALS: Blocked Alternating Least Squares for Parallel Sparse Matrix Factorization},
year={2021},
volume={32},
number={9},
pages={2291-2302},
doi={10.1109/TPDS.2021.3064942}
}
@Article{Ji2021,
author="Ji, Haonan and Lu, Shibo and Hou, Kaixi and Wang, Hao and Jin, Zhou and Liu, Weifeng and
Vinter, Brian",
title="Segmented Merge: A New Primitive for Parallel Sparse Matrix Computations",
journal="International Journal of Parallel Programming",
year="2021",
month="3",
day="26",
volume="",
number="",
pages="",
issn="1573-7640",
}
@INPROCEEDINGS{9460505,
author={Niu, Yuyao and Lu, Zhengyang and Dong, Meichen and Jin, Zhou and Liu, Weifeng and Tan,
Guangming},
booktitle={2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
title={TileSpMV: A Tiled Algorithm for Sparse Matrix-Vector Multiplication on GPUs},
year={2021},
volume={},
number={},
pages={68-78},
doi={10.1109/IPDPS49936.2021.00016}
}
@inproceedings{10.1145/3404397.3404413,
author = {Lu, Zhengyang and Niu, Yuyao and Liu, Weifeng},
title = {Efficient Block Algorithms for Parallel Sparse Triangular Solve},
year = {2020},
isbn = {9781450388160},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3404397.3404413},
doi = {10.1145/3404397.3404413},
booktitle = {49th International Conference on
Parallel Processing - ICPP},
articleno = {63},
numpages = {11},
keywords = {block algorithm, sparse triangular solve, GPU, sparse matrix},
location = {Edmonton, AB, Canada},
series = {ICPP '20}
}
@inproceedings{10.1145/3404397.3404400,
author = {Su, Jiya and Zhang, Feng and Liu, Weifeng and He, Bingsheng and Wu, Ruofan and Du,
Xiaoyong and Wang, Rujia},
title = {CapelliniSpTRSV: A Thread-Level Synchronization-Free Sparse Triangular Solve on GPUs},
year = {2020},
isbn = {9781450388160},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3404397.3404400},
doi = {10.1145/3404397.3404400},
booktitle = {49th International Conference on Parallel Processing - ICPP},
articleno = {2},
numpages = {11},
location = {Edmonton, AB, Canada},
series = {ICPP '20},
}
@InProceedings{10.1007/978-3-030-79478-1_20,
author = {Yu, Xiaosong and Ma, Huihui and Qu, Zhengyu and Fang, Jianbin and Liu, Weifeng},
editor={He, Xin and Shao, En and Tan Guangming},
title = {NUMA-Aware Optimization of Sparse Matrix-Vector Multiplication on ARMv8-Based Many-Core
Architectures},
booktitle={Network and Parallel Computing},
year = {2021},
publisher={Springer International Publishing},
address={Cham},
pages={231--242},
isbn={978-3-030-79478-1},
}
@inproceedings{xie2019spgemm,
author = {Xie, Zhen and Tan, Guangming and Liu, Weifeng and Sun, Ninghui},
title = {IA-SpGEMM: An Input-aware Auto-tuning Framework for Parallel Sparse Matrix-Matrix
Multiplication},
booktitle = {Proceedings of the 2019 International Conference on Supercomputing},
series = {ICS '19},
year = {2019},
isbn = {},
location = {},
pages = {},
numpages = {12},
url = {},
doi = {},
acmid = {},
address = {},
}
@Article{zhang2019apu,
author="Feng Zhang and Weifeng Liu and Ningxuan Feng and Jidong Zhai and Xiaoyong Du",
title="Performance Evaluation and Analysis of Sparse Matrix and Graph Kernels on Heterogeneous
Processors",
journal="CCF Transactions on High Performance Computing",
year="2019",
month="",
day="",
volume="",
number="",
pages="",
issn="2524-4922",
}
@Article{liu2019spgemm,
author="Liu, Junhong and He, Xin and Liu, Weifeng and Tan, Guangming",
title="Register-Aware Optimizations for Parallel Sparse Matrix-Matrix Multiplication",
journal="International Journal of Parallel Programming",
year="2019",
month="",
day="",
volume="",
number="",
pages="",
issn="1573-7640",
}
@article {chen2018clmf,
author = {Jing Chen and Jianbin Fang and Weifeng Liu and Tao Tang and Canqun Yang},
title = {clMF: A Fine-Grained and Portable Alternating Least Squares Algorithm for Parallel Matrix
Factorization},
journal = {Future Generation Computer Systems},
volume = {},
number = {},
pages = {},
year = {2018},
}
@inproceedings{li2018warp,
author = {Li, Ang and Liu, Weifeng and Wang, Linnan and Barker, Kevin and Song, Shuaiwen Leon},
title = {Warp-Consolidation: A Novel Execution Model for GPUs},
booktitle = {Proceedings of the 2018 International Conference on Supercomputing},
series = {ICS '18},
year = {2018},
isbn = {978-1-4503-5783-8},
location = {Beijing, China},
pages = {53--64},
numpages = {12},
url = {http://doi.acm.org/10.1145/3205289.3205294},
doi = {10.1145/3205289.3205294},
acmid = {3205294},
address = {New York, NY, USA},
}
@inproceedings{wang2018sptrsv,
author = {Wang, Xinliang and Liu, Weifeng and Xue, Wei and Wu, Li},
title = {swSpTRSV: A Fast Sparse Triangular Solve with Sparse Level Tile Layout on Sunway
Architectures},
booktitle = {Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel
Programming},
series = {PPoPP '18},
year = {2018},
isbn = {978-1-4503-4982-6},
location = {Vienna, Austria},
pages = {338--353},
numpages = {16},
url = {http://doi.acm.org/10.1145/3178487.3178513},
doi = {10.1145/3178487.3178513},
acmid = {3178513},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{liu2018spgemm,
author = {Liu, Junhong and He, Xin and Liu, Weifeng and Tan, Guangming},
title = {Register-based Implementation of the Sparse General Matrix-matrix Multiplication on GPUs},
booktitle = {Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel
Programming},
series = {PPoPP '18},
year = {2018},
isbn = {978-1-4503-4982-6},
location = {Vienna, Austria},
pages = {407--408},
numpages = {2},
url = {http://doi.acm.org/10.1145/3178487.3178529},
doi = {10.1145/3178487.3178529},
acmid = {3178529},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings{li2017hbm,
author = {Li, Ang and Liu, Weifeng and Kristensen, Mads R. B. and Vinter, Brian and Wang, Hao and
Hou, Kaixi and Marquez, Andres and Song, Shuaiwen Leon},
title = {Exploring and Analyzing the Real Impact of Modern On-package Memory on HPC Scientific
Kernels},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking,
Storage and Analysis},
series = {SC '17},
year = {2017},
isbn = {978-1-4503-5114-0},
location = {Denver, Colorado},
pages = {26:1--26:14},
articleno = {26},
numpages = {14},
url = {http://doi.acm.org/10.1145/3126908.3126931},
doi = {10.1145/3126908.3126931},
acmid = {3126931},
publisher = {ACM},
address = {New York, NY, USA},
}
@article {liu2017sptrsv,
author = {Liu, Weifeng and Li, Ang and Hogg, Jonathan D. and Duff, Iain S. and Vinter, Brian},
title = {Fast Synchronization-Free Algorithms for Parallel Sparse Triangular Solves with Multiple
Right-Hand Sides},
journal = {Concurrency and Computation: Practice and Experience},
volume = {29},
number = {21},
pages = {e4244--n/a},
issn = {1532-0634},
url = {http://dx.doi.org/10.1002/cpe.4244},
doi = {10.1002/cpe.4244},
year = {2017},
}
@inproceedings{hou2017segsort,
author = {Hou, Kaixi and Liu, Weifeng and Wang, Hao and Feng, Wu-chun},
title = {Fast Segmented Sort on GPUs},
booktitle = {Proceedings of the International Conference on Supercomputing},
series = {ICS '17},
year = {2017},
isbn = {978-1-4503-5020-4},
location = {Chicago, Illinois},
pages = {12:1--12:10},
articleno = {12},
numpages = {10},
url = {http://doi.acm.org/10.1145/3079079.3079105},
doi = {10.1145/3079079.3079105},
acmid = {3079105},
publisher = {ACM},
}
@inproceedings{li2017cta,
author = {Li, Ang and Song, Shuaiwen Leon and Liu, Weifeng and Liu, Xu and Kumar, Akash and
Corporaal, Henk},
title = {Locality-Aware CTA Clustering for Modern GPUs},
booktitle = {Proceedings of the Twenty-Second International Conference on Architectural Support for
Programming Languages and Operating Systems},
series = {ASPLOS '17},
year = {2017},
isbn = {978-1-4503-4465-4},
location = {Xi'an, China},
pages = {297--311},
numpages = {15},
url = {http://doi.acm.org/10.1145/3037697.3037709},
doi = {10.1145/3037697.3037709},
acmid = {3037709},
publisher = {ACM},
}
@inproceedings{chen2017als,
author = {Jing Chen and Jianbin Fang and Weifeng Liu and Tao Tang and Xuhao Chen and Canqun Yang},
title = {Efficient and Portable ALS Matrix Factorization for Recommender Systems},
booktitle = {Proceedings of the 6th International Workshop on Parallel and Distributed Computing for
Large Scale Machine Learning and Big Data Analytics},
series = {Parlearning '17},
year = {2017},
pages={409--418},
}
@inproceedings{liu2016sptrsv,
author = {Liu, Weifeng and Li, Ang and Hogg, Jonathan and Duff, Iain S. and Vinter, Brian},
title = {A Synchronization-Free Algorithm for Parallel Sparse Triangular Solves},
booktitle = {Proceedings of the 22Nd International Conference on Euro-Par 2016: Parallel Processing
- Volume 9833},
year = {2016},
isbn = {978-3-319-43658-6},
pages = {617--630},
numpages = {14},
url = {http://dx.doi.org/10.1007/978-3-319-43659-3_45},
doi = {10.1007/978-3-319-43659-3_45},
acmid = {2990990},
publisher = {Springer-Verlag New York, Inc.},
}
@inproceedings{wang2016sptrans,
author = {Wang, Hao and Liu, Weifeng and Hou, Kaixi and Feng, Wu-chun},
title = {Parallel Transposition of Sparse Data Structures},
booktitle = {Proceedings of the 2016 International Conference on Supercomputing},
series = {ICS '16},
year = {2016},
isbn = {978-1-4503-4361-9},
location = {Istanbul, Turkey},
pages = {33:1--33:13},
numpages = {13},
url = {http://doi.acm.org/10.1145/2925426.2926291},
doi = {10.1145/2925426.2926291},
publisher = {ACM},
}
@inproceedings{liu2015csr5,
author = {Liu, Weifeng and Vinter, Brian},
title = {CSR5: An Efficient Storage Format for Cross-Platform Sparse Matrix-Vector Multiplication},
booktitle = {Proceedings of the 29th ACM International Conference on Supercomputing},
series = {ICS '15},
year = {2015},
isbn = {978-1-4503-3559-1},
location = {Newport Beach, California, USA},
pages = {339--350},
numpages = {12},
url = {http://doi.acm.org/10.1145/2751205.2751209},
doi = {10.1145/2751205.2751209},
publisher = {ACM},
}
@article{liu2015spmv,
author = {Liu, Weifeng and Vinter, Brian},
title = {Speculative Segmented Sum for Sparse Matrix-vector Multiplication on Heterogeneous
Processors},
journal = {Parallel Computing},
issue_date = {November 2015},
volume = {49},
number = {C},
month = nov,
year = {2015},
issn = {0167-8191},
pages = {179--193},
numpages = {15},
url = {https://doi.org/10.1016/j.parco.2015.04.004},
doi = {10.1016/j.parco.2015.04.004},
}
@article{liu2015spgemm,
author = {Liu, Weifeng and Vinter, Brian},
title = {A Framework for General Sparse Matrix-Matrix Multiplication on GPUs and Heterogeneous
Processors},
journal = {Journal of Parallel and Distributed Computing},
volume = {85},
number = {C},
month = nov,
year = {2015},
issn = {0743-7315},
pages = {47--61},
numpages = {15},
url = {http://dx.doi.org/10.1016/j.jpdc.2015.06.010},
doi = {10.1016/j.jpdc.2015.06.010},
}
@inproceedings{liu2014spgemm,
author = {Liu, Weifeng and Vinter, Brian},
title = {An Efficient GPU General Sparse Matrix-Matrix Multiplication for Irregular Data},
booktitle = {Proceedings of the 2014 IEEE 28th International Parallel and Distributed Processing
Symposium},
series = {IPDPS '14},
year = {2014},
isbn = {978-1-4799-3800-1},
pages = {370--381},
numpages = {12},
url = {http://dx.doi.org/10.1109/IPDPS.2014.47},
doi = {10.1109/IPDPS.2014.47},
publisher = {IEEE Computer Society},
}
@inproceedings{liu2014adheap,
author = {Liu, Weifeng and Vinter, Brian},
title = {Ad-heap: An Efficient Heap Data Structure for Asymmetric Multicore Processors},
booktitle = {Proceedings of Workshop on General Purpose Processing Using GPUs},
series = {GPGPU-7},
year = {2014},
isbn = {978-1-4503-2766-4},
pages = {54:54--54:63},
numpages = {10},
url = {http://doi.acm.org/10.1145/2576779.2576786},
doi = {10.1145/2576779.2576786},
publisher = {ACM},
}
@phdthesis{phdthesisliu,
author = {Weifeng Liu},
title = {Parallel and Scalable Sparse Basic Linear Algebra Subprograms},
year = 2015,
school = {University of Copenhagen},
}
@article {ren2018trans,
author={H. Ren and N. Kanhabua and A. M�gelmose and W. Liu and K. Kulkarni and S. Escalera and X.
Bar� and T. B. Moeslund},
journal={IET Computer Vision},
title={Back-Dropout Transfer Learning for Action Recognition},
year={2018},
volume={12},
number={4},
pages={484--491},
doi={10.1049/iet-cvi.2016.0309},
}
@inproceedings{ren2015dl,
title={Unsupervised Behavior-Specific Dictionary Learning for Abnormal Event Detection},
author={Huamin Ren and Weifeng Liu and S�ren Ingvor Olsen and Sergio Escalera and Thomas B.
Moeslund},
year={2015},
month={September},
pages={28.1--28.13},
articleno={28},
numpages={13},
booktitle={Proceedings of the British Machine Vision Conference (BMVC)},
publisher={BMVA Press},
editor={Xianghua Xie, Mark W. Jones, and Gary K. L. Tam},
doi={10.5244/C.29.28},
isbn={1-901725-53-7},
url={https://dx.doi.org/10.5244/C.29.28}
}