@inproceedings{li2026trojanhorse,
  author={Li, Yida and Zhang, Siwei and Niu, Yiduo and Du, Yang and Sun, Qingxiao and Jin, Zhou and Liu, Weifeng},
  title={Trojan Horse: Aggregate-and-Batch for Scaling Up Sparse Direct Solvers on GPU Clusters},
  booktitle = {Proceedings of the 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming (PPoPP)},
  year={2026} }

@inproceedings{lu2026cubie,
  author={Lu, Yuechen and Zeng, Hongwei and Casas, Marc and Liu, Weifeng},
  title={Characterizing Matrix Multiplication Units across General Parallel Patterns in Scientific Computing},
  booktitle = {Proceedings of the 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming (PPoPP)},
  year={2026} }

@inproceedings{niu2026diggerbees,
  author={Niu, Yuyao and Lu, Yuechen and Liu, Weifeng and Casas, Marc},
  title={DiggerBees: Depth First Search Leveraging Hierarchical Block-Level Stealing on GPUs},
  booktitle = {Proceedings of the 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming (PPoPP)},
  year={2026} }

@inproceedings{lian2026UniSTC,
  author={Haocheng, Lian and Qiyue, Zhang and Xinran, Zhao and Meichen, Dong and Yijie, Nie and Zhengyi, Zhao and Junzhong, Shen and Wei, Guo and Chun, Huang and Bingcai, Sui and Weifeng, Liu},
  title={Uni-STC: Unified Sparse Tensor Core},
  booktitle = {IEEE Symposium on High-Performance Computer Architecture (HPCA)},
  year={2026} }

@inproceedings{wang2025kami,
  author={Wang, Hemeng and Du, Yang and Li, Sidu and Tian, Xiaowen, and Sun, Qingxiao and Liu, Weifeng},
  title={KAMI: Communication-Avoiding General Matrix Multiplication within a Single GPU},
  booktitle={38th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
  year={2025},
  organization={ACM/IEEE}, }

@inproceedings{li2025GSpNN,
  author={Zeng, Lijie and Sun, Jiatai and Wu, Xiao and Niu, Dan and Wang, Tianshi and Lin, Yibo and Ye, Zuochang and Jin, Zhou},
  booktitle={2025 62st ACM/IEEE Design Automation Conference (DAC)},
  title={G-SpNN: GPU-Accelerated Passivity Enforcement for S-Parameter Modeling with Neural Networks},
  pages={1-6},
  year={2025},
  organization={ACM/IEEE} }

@inproceedings{li2025MemSens,
  author={Chenxi Li, Yihang Feng, Fuxing Deng, Dingwen Tao, Weifeng Liu and Zhou Jin},
  booktitle={2025 62st ACM/IEEE Design Automation Conference (DAC)},
  title={MemSens: Significantly Reducing Memory Overhead in Adjoint Sensitivity Analysis Using Novel Error-Bounded Lossy Compression},
  pages={1-6},
  year={2025},
  organization={ACM/IEEE} }

@inproceedings{Li2025PiSPICE,
  author={Jin, Zhou and Li, Jing and Xin, Jian and Zhou, Tianjia and Wu, Xiao and Niu, Dan and Ye, Zuochang},
  booktitlel={2025 62st ACM/IEEE Design Automation Conference (DAC)},
  title={PiSPICE: Accelerating Post-Layout SPICE Simulation via Essential Parasitic Identification},
  pages={1-6},
  year={2025},
  organization={ACM/IEEE} }

@inproceedings{Li2025MLPTA,
  author={Jin, Zhou and Li, Wenhao and Pei, Haojie and Zha, Xiaru and Dong, Yichao and Jin, Xiang and Wu, Xiao and Niu, Dan and Xing, Wei W.},
  journal={IEEE Transactions on Computers},
  title={ML-PTA: A Two-Stage ML-Enhanced Framework for Accelerating Nonlinear DC Circuit Simulation With Pseudo-Transient Analysis},
  pages={3319-3331},
  year={2025} }

@inproceedings{Yu2025LaRED,
  author={Yu, Chengxuan and Teng, Yanshuang and Dai, Wenhao and Li, Yongjiang and Xing, Wei and Wu, Xiao and Niu, Dan and Jin, Zhou},
  booktitle={2025 Design, Automation Test in Europe Conference (DATE)},
  title={LaRED: Efficient IR Drop Predictor with Layout-Preserving Rebuilder-Encoder-Decoder Architecture},
  year={2025},
  pages={1-7},
  doi={10.23919/DATE64628.2025.10993121} }

@inproceedings{zhang2025ANovel,
  author={Zhang, Dekang and Niu, Dan and Jin, Zhou and Dong, Yichao and Tan, Jingweijia and Sun, Changyin},
  booktitle={2025 Design, Automation Test in Europe Conference (DATE)},
  title={A Novel Frequency-Spatial Domain Aware Network for Fast Thermal Prediction in 2.5D ICs},
  year={2025},
  pages={1-7},
  doi={10.23919/DATE64628.2025.10993272} }

@inproceedings{Jiang2025Boosting,
  author={Jiang, Jiqing and Duan, Yongqiang and Jin, Zhou},
  journal={30th ACM/IEEE Asia and South Pacific Design Automation Conference (ASP-DAC '25)},
  title={Boosting the Performance of Transistor-Level Circuit Simulation with GNN},
  year={2025},
  pages={114-120},
  doi={10.1145/3658617.3703149} }

@inproceedings{geng2025ReRAM,
  author={Geng, Boyu and Fan, Mingjia and Jin, Zhou and Liu, Weifeng},
  booktitle={2025 IEEE International Symposium on Circuits and Systems (ISCAS)},
  title={ReRAM-Based Process-In-Memory Accelerator for Iterative Solvers: A Systematic Survey},
  year={2025},
  pages={1-5},
  doi={10.1109/ISCAS56072.2025.11043455} }

@inproceedings{Wang2025vGNN,
  author={Wang, Hemeng and Lin, Wenqing and Sun, Qingxiao and Liu, Weifeng},
  journal={CCF Transactions on High Performance Computing},
  title={vGNN: Non-Uniformly partitioned full-graph GNN training on mixed GPUs},
  year={2025},
  doi={10.1007/s42514-025-00224-3} }

@inproceedings{Dai2025CoMP,
  author={Dai, Wenhao and Jia, Ziyi and Bai, Yuesi and Sun, Qingxiao},
  journal={CCF Transactions on High Performance Computing},
  title={Convergence-aware operator-wise mixed-precision training},
  year={2025},
  doi={10.1007/s42514-024-00208-9} }

@inproceedings{Lu2024AmgT,
  author={Lu, Yuechen and Zeng, Lijie and Wang, Tengcheng and Fu, Xu and Li, Wenxuan and Cheng, Helin and Yang, Dechuang and Jin, Zhou and Casas, Marc and Liu, Weifeng},
  title={AmgT: Algebraic Multigrid Solver on Tensor Cores},
  booktitle={37th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
  pages={823-838},
  year={2024},
  organization={ACM/IEEE}, }

@inproceedings{Yang2024Millefeuille,
  author={Yang, Dechuang and Zhao, Yuxuan and Niu, Yiduo and Jia, Weile and Shao, En and Liu, Weifeng and Tan, Guangming and Jin, Zhou},
  title={Mille-feuille: A Tile-Grained Mixed Precision Single-Kernel Conjugate Gradient Solver on GPUs},
  booktitle={37th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
  pages={916-931},
  year={2024},
  organization={ACM/IEEE}, }

@inproceedings{2024-niu-ISLU,
  author={Niu, Dan and Tao, Yiyang and Jin, Zhou and Dong, Yichao and Wang, Chao and Sun, Changyin},
  title={ISLU: Indexing-Efficient Sparse LU Factorization for Circuit Simulation on GPUs},
  booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
  pages={1--6},
  year={2024}, }

@inproceedings{zhao2024CSP,
  author={Zhao, Yuxuan and Yang, Xiaoyu and Bai, Yinuo and Zeng, Lijie and Niu, Dan and Liu, Weifeng and Jin, Zhou},
  title={CSP: Comprehensively-Sparsified Preconditioner for Efficient nonlinear Circuit Simulation},
  booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
  pages={1--6},
  year={2024}, }

@inproceedings{ling2024ScalFrag,
  author={Lin, Wenqing and Wang, Hemeng and Deng, Haodong and Sun, Qingxiao},
  title={ScalFrag: Efficient Tiled-MTTKRP with Adaptive Launching on GPUs},
  booktitle={2024 IEEE International Conference on Cluster Computing (CLUSTER)},
  pages={335--345},
  year={2023},
  organization={IEEE} }

@inproceedings{li2024Leda,
  author={Yi, Enxin and Bai, Jiarui and Nie, Yijie and Niu, Dan and Jin, Zhou and Liu, Weifeng},
  title={Leda: Leveraging Tiling Dataflow to Accelerate SpMM on HBM-Equipped FPGAs for GNNs},
  booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
  pages={1--6},
  year={2024}, }

@inproceedings{sun2024SodaPTA,
  author={Sun, Jiatai and Zha, Xiaru and Wang, Chao and Wu, Xiao and Niu, Dan and Xing, Wei and Jin, Zhou},
  title={Pseudo Adjoint Optimization: Harnessing the Solution Curve for SPICE Acceleration},
  booktitle={43rd ACM/IEEE International Conference on Computer-Aided Design (ICCAD)},
  pages={1--9},
  year={2024}, }

@inproceedings{guo2024heterogeneous,
  author={Guo, Zizheng, Huang, Tsung-Wei and Jin, Zhou and Zhou, Cheng and Lin, Yibo and Wang, Runsheng and Huang, Ru},
  title={Heterogeneous Static Timing Analysis with Advanced Delay Calculator},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{jiang2024aparallel,
  author={Jiang, Yangfan and Song, Jianfei and Yin, Xunzhao and Dong, Xiao and Sun, Songyu and Lin, Yibo and Jin, Zhou and Yang, Xiaoyu and Zhuo, Cheng},
  title={A Parallel Simulation Framework Incorporating Machine Learning-Based Hotspot Detection for Accelerated Power Grid Analysis},
  booktitle={3rd ACM/IEEE International Symposium on Machine Learning for CAD (MLCAD)},
  year={2024}, }

@inproceedings{dong2024isptnet,
  author={Dong, Yichao and Niu, Dan and Jin, Zhou and Zhang, Chuan and Sun, Changyin and Zhou, Zhenya},
  title={ISPT-Net: A Noval Transient Backward-stepping Reduction Policy by Irregular Sequential PredictionTransformer},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{bai2024efficient,
  author={Bai, Yinuo and Yang, Xiaoyu and Lu, Yicheng and Niu, Dan and Zhou, Cheng and Jin, Zhou and Liu, Weifeng},
  title={Efficient Spectral-Aware Power Supply Noise Analysis for Low-Power Design Verification},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{jin2024msh,
  author={jin, Zhou and Feng, Tian and Wu Xiao and Niu, Dan and Zhou, Zhenya and Zhou Cheng},
  title={MSH: A Multi-Stage HiZ-Aware Homotopy Framework for Nonlinear DC Analysis},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{chen2024tsapicer,
  author={Chen, Pengju and Niu, Dan and jin, Zhou and Sun, Changyin Sun and li, Qi and Yan, Hao},
  title={TSA-TICER: A Two-Stage TICER Acceleration Framework for Model Order Reduction},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{yi2024cuper,
  author={yi, Enxin and Duan, Yiru and Bai, Yinuo and Zhao, Kang and Jin, Zhou and Liu, Weifeng},
  title={Cuper: Customized Dataflow and Perceptual Decoding for Sparse Matrix-Vector Multiplication on HBM-Equipped FPGAs},
  booktitle={2024 21st Design, Automation and Test in Europe Conference (DATE)},
  pages={1--6},
  year={2024}, }

@inproceedings{Yang2024thSORT,
  author={Mouzhi Yang, Peng Zhang, Jianbin Fang, Weifeng Liu, Chun Huang},
  journal={CCF Transactions on High Performance Computing},
  title={thSORT: An Efficient Parallel Sorting Algorithm on Multi‑core DSPs},
  year={2024},
  doi={10.1007/s42514-023-00175-7} }

@inproceedings{feng2024ALSS,
  author={Feng, Guofeng and Wang, Hongyu and Guo, Zhuoqiang and Li, Mingzhen and Zhao, Tong and Jin, Zhou and Jia, Weile and Tan, Guangming and Sun, Ninghui},
  title={Accelerating Large-Scale Sparse LU Factorization for RF Circuit Simulation},
  booktitle={Euro-Par 2024: Parallel Processing},
  pages={182--195},
  year={2024}, }

@inproceedings{li2023haspmv,
  author={Wenxuan Li, Helin Cheng, Zhengyang Lu, Yuechen Lu, Weifeng Liu},
  title={HASpMV: Heterogeneity-Aware Sparse Matrix-Vector Multiplication on Modern Asymmetric Multicore Processors},
  booktitle={2023 IEEE International Conference on Cluster Computing (CLUSTER)},
  pages={1--12},
  year={2023},
  organization={IEEE} }

@inproceedings{liao2023exploiting,
  author={Jianjin Liao, Mingzhen Li, Hailong Yang, Qingxiao Sun, Biao Sun, Jiwei Hao, Tianyu Feng, Fengwei Yu, Shengdong Chen, Ye Tao, Zicheng Zhang, Zhongzhi Luan, Depei Qian},
  title={Exploiting Input Tensor Dynamics in Activation Checkpointing for Efficient Training on GPUIPDS},
  booktitle={2023 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
  pages={1--15},
  year={2023},
  organization={IEEE} }

@inproceedings{Lu2023DASP,
  author={Yuechen Lu and Liu, Weifeng},
  title={DASP: Specific Dense Matrix Multiply-Accumulate Units Accelerated General Sparse Matrix-Vector Multiplication},
  booktitle={36th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
  year={2023},
  organization={ACM/IEEE}, }

@inproceedings{Fu2023PanguLU,
  author={Xu Fu, Bingbin Zhang, Tengcheng Wang, Wenhao Li, Yuechen Lu, Enxin Yi, Jianqi Zhao, Xiaohan Geng, Fangying Li, Jingwen Zhang, Zhou Jin and Liu, Weifeng},
  title={PanguLU: A Scalable Regular Two-Dimensional Block-Cyclic Sparse Direct Solver on Distributed Heterogeneous Systems},
  booktitle={36th ACM/IEEE International Conference for High Performance Computing, Networking, Storage, and Analysis (SC)},
  year={2023},
  organization={ACM/IEEE}, }

@inproceedings{Lu2023TileSpTRSV,
  author={Lu, Zhengyang and Liu, Weifeng},
  journal={CCF Transactions on High Performance Computing},
  title={TileSpTRSV: a tiled algorithm for parallel sparse triangular solve on GPUs},
  year={2023},
  volume={5},
  number={2},
  pages={129-143},
  doi={10.1007/s42514-023-00151-1}, }

@inproceedings{Cheng2023HASpGEMM,
  author={Cheng, Helin and Li, Wenxuan and Lu, Yuechen and Liu, Weifeng},
  title={HASpGEMM: Heterogeneity-Aware Sparse General Matrix-Matrix Multiplication on Modern Asymmetric Multicore Processors},
  booktitle={2023 Proceedings of the 52nd International Conference on Parallel Processing (ICPP)},
  pages={807--817},
  year={2023}, }

@inproceedings{fan2024ReCG,
  author={Fan, Mingjia and Cheng, Xiaoming and Yang, Dechuang and Jin, Zhou and Liu, Weifeng},
  title={ReCG: ReRAM-Accelerated Sparse Conjugate Gradient},
  booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2024},
  organization={ACM/IEEE} }

@inproceedings{bai2024unleashing,
  author={Bai, Yinuo and Yi, Enxin and Xing, Wei and Yu, Bei and Jin, Zhou},
  title={Unleashing the Potential of AQFP Logic Placement via Entanglement Entropy and Projection},
  booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2024},
  organization={ACM/IEEE} }

@inproceedings{li2024MASC,
  author={Li, Chenxi and Zhang, Boyuan and Duan, Yongqiang and Li, Yang and Ye, Zuochang and Liu, Weifeng, and Tao, Dingwen and Jin, Zhou},
  title={MASC: A Memory-Efficient Adjoint Sensitivity Analysis through Compression Using Novel Spatiotemporal Prediction},
  booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2024},
  organization={ACM/IEEE} }

@inproceedings{wang2024MAUnet,
  author={Wang, Mingyue and Cheng, Yuanqing and Lin, Yage and Peng, Kelin and Yang Shunchuan and Jin, Zhou and Xing, Wei},
  title={MAUnet: Multiscale Attention U-Net for Effective IR Drop Prediction},
  booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2024},
  organization={ACM/IEEE} }

@inproceedings{wang2023Accelerating,
  author={Wang, Tengcheng and Li, Wenhao and Pei, Haojie and Sun, Yuying and Jin, Zhou and Liu, Weifeng},
  title={Accelerating Sparse LU Factorization with Density-Aware Adaptive Matrix Multiplication for Circuit Simulation},
  booktitle={2023 60th ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2023},
  organization={ACM/IEEE} }

@inproceedings{fan2023AmgR,
  author={Fan, Mingjia and Tian, Xiaotian and He, Yintao and Li, Junxian and Duan, Yiru and Hu, Xiaozhe and Wang, Ying and Jin, Zhou and Liu, Weifeng},
  title={AmgR: Algebraic Multigrid Accelerated on ReRAM},
  booktitle={2023 60th ACM/IEEE Design Automation Conference (DAC)},
  pages={1--6},
  year={2023},
  organization={ACM/IEEE} }

@inproceedings{mi2023balancing,
  author={Mi, Hongli and Yu, Xiangrui and Yu, Xiaosong and Wu, Shuangyuan and Liu, Weifeng},
  title={Balancing Computation and Communication in Distributed Sparse Matrix-Vector Multiplication},
  booktitle={2023 IEEE/ACM 23rd International Symposium on Cluster, Cloud and Internet Computing (CCGrid)},
  pages={535--544},
  year={2023},
  organization={IEEE} }

@inproceedings{li2024EMGA,
  author={Li, Yang and Duan, Yongqiang and Zhang, Hao and Niu, Dan and Wu, Xiao and Jin, Zhou},
  title={EMGA: An Evolutionary Memory Grouping Algorithm for MBIST},
  booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
  year={2024},
  organization={IEEE} }

@inproceedings{deng2024sdssta,
  author={Deng, Fuxing and Feng, Yihang and Niu, Dan and Wu, Xiao and Jin, Zhou},
  title={SD-SSTA: Statistical Static Time Analysis Algorithm Considering Skewed Distribution},
  booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
  year={2024},
  organization={IEEE} }

@inproceedings{qi2024UnetPro,
  author={Qi, Zhengfei and Wang, Wanchao and Yu, Chengxuan and Niu, Dan and Wu, Xiao and Jin, Zhou},
  title={UnetPro: Combining Attention with Skip Connection in Unet for Efficient IR Drop Prediction},
  booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
  year={2024},
  organization={IEEE} }

@inproceedings{zhang2024pticer,
  author={Zhang, Zijia and Niu, Dan and Jin, Zhou and Chen, Pengju and Zhou, Zhenya and Sun, Chengyin},
  title={P-TICER: An Effective Parallel TICER Acceleration Method for Model Order Reduction},
  booktitle={2024 International Symposium of Electronics Design Automation (ISEDA)},
  year={2024},
  organization={IEEE} }

@inproceedings{zha2023Deeplearning,
  author={Zha, Xiaru and Pei, Haojie and Niu, Dan and Wu, Xiao and Jin, Zhou},
  title={Deep Learning Enhanced Time-Step Control in Pseudo Transient Analysis for Efficient Nonlinear DC Simulation},
  booktitle={2023 International Symposium of Electronics Design Automation (ISEDA)},
  pages={23--28},
  year={2023},
  organization={IEEE} }

@article{yu2018improving,
  author={Yu, Chao and Bai, Yuebin and Sun, Qingxiao and Yang, Hailong},
  title={Improving thread-level parallelism in GPUs through expanding register file to scratchpad memory},
  journal={ACM Transactions on Architecture and Code Optimization (TACO)}, volume={15},
  number={4},
  pages={1--24},
  year={2018},
  publisher={ACM New York, NY, USA} }

@inproceedings{dun2020accelerating,
  author={Dun, Ming and Li, Yunchun and You, Xin and Sun, Qingxiao and Luan, Zerong and Yang, Hailong},
  title={Accelerating De Novo Assembler WTDBG2 on Commodity Servers},
  booktitle={International Conference on Algorithms and Architectures for Parallel Processing},
  pages={232--246},
  year={2020},
  organization={Springer} }

@inproceedings{jin2024machine,
  author={Jin, Zhou and Li, Wenhao and Bai, Yinuo Bai and Wang, Tengcheng and Lu, Yicheng and Liu, Weifeng},
  title={Machine Learning and GPU Accelerated Sparse Linear Solvers for Transistor-Level Circuit Simulation: A Perspective Survey (Invited Paper)},
  booktitle={2024 29th Asia and South Pacific Design Automation Conference (ASP-DAC)},
  pages={96--101},
  year={2024},
  organization={IEEE} }

@inproceedings{chen2022application,
  author={Chen, Yufei and Pei, Haojie and Dong, Xiao and Jin, Zhou and Zhuo, Cheng},
  title={Application of deep learning in back-end simulation: Challenges and opportunities},
  booktitle={2022 27th Asia and South Pacific Design Automation Conference (ASP-DAC)},
  pages={641--646},
  year={2022},
  organization={IEEE} }

@inproceedings{2022-ji-tilespmspv,
  author = {Ji, Haonan and Song, Huimin and Lu, Shibo and Jin, Zhou and Tan, Guangming and Liu, Weifeng},
  title = {TileSpMSpV: A Tiled Algorithm for Sparse Matrix-Sparse Vector Multiplication on GPUs},
  year = {2023},
  publisher = {Association for Computing Machinery},
  doi = {10.1145/3545008.3545028},
  booktitle = {Proceedings of the 51st International Conference on Parallel Processing},
  numpages = {11},
  series = {ICPP '22}
}

@ARTICLE{2023-jin-ossp,
  author={Niu, Dan and Dong, Yichao and Jin, Zhou and Zhang, Chuan and Li, Qi and Sun, Changyin},
  journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
  title={OSSP-PTA: An Online Stochastic Stepping Policy for PTA on Reinforcement Learning},
  year={2023},
  doi={10.1109/TCAD.2023.3251731},
}

@inbook{2022-dong-PTA,
  author = {Dong, Yichao and Niu, Dan and Jin, Zhou and Zhang, Chuan and Li, Qi and Sun, Changyin},
  title = {Adaptive Stepping PTA for DC Analysis Based on Reinforcement Learning},
  journal = {{IEEE Transactions on Circuits and Systems II: Express Briefs},
  year={2023},
  volume={70},
  doi={10.1109/TCSII.2022.3207356},
}


@inbook{2022-xing-BoA-PTA,
  author = {Xing, Wei W. and Jin, Xiang and Feng, Tian and Niu, Dan and Zhao, Weishen and Jin, Zhou},
  title = {BoA-PTA, An Bayesian Optimization Accelerated PTA Solver for SPICE Simulation},
  year = {2022},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3555805},
  journal = {ACM Trans. Des. Autom. Electron. Syst.},
  month = {jul},
  issn = {1084-4309},
  doi = {10.1145/3555805},
}


@inbook{2022-jin-RLPTA,
  author = {Jin, Zhou and Pei, Haojie and Dong, Yichao and Jin, Xiang and Wu, Xiao and Xing, Wei W. and Niu, Dan},
  title = {Accelerating Nonlinear DC Circuit Simulation with Reinforcement Learning},
  year = {2022},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3489517.3530512},
  doi = {10.1145/3489517.3530512},
  booktitle = {Proceedings of the 59th ACM/IEEE Design Automation Conference},
  pages = {619–624},
  numpages = {6},
  series = {DAC '22},
}


@inbook{2021-jin-PALBBD,
  author = {Jin, Zhou and Feng, Tian and Duan, Yiru and Wu, Xiao and Cheng, Minghou and Zhou, Zhenya and Liu, Weifeng},
  title = {PALBBD: A Parallel ArcLength Method Using Bordered Block Diagonal Form for DC Analysis},
  year = {2021},
  isbn = {9781450383936},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3453688.3461526},
  booktitle = {Proceedings of the 2021 on Great Lakes Symposium on VLSI},
  pages = {327–332},
  numpages = {6},
}


@inproceedings{2022-niu-tilespgemm,
  author = {Niu, Yuyao and Lu, Zhengyang and Ji, Haonan and Song, Shuhui and Jin, Zhou and Liu, Weifeng},
  title = {TileSpGEMM: A Tiled Algorithm for Parallel Sparse General Matrix-Matrix Multiplication on GPUs},
  year = {2022},
  isbn = {9781450392044},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3503221.3508431},
  {Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
  pages = {90-106},
  numpages = {17},
  series = {PPoPP '22'}
}


@ARTICLE{lu2021implementing,
  author={Lu, Yuechen and Luo, Yuchen and Lian, Haocheng and Jin, Zhou and Liu, Weifeng},
  journal={CCF Transactions on High Performance Computing},
  title={Implementing LU and Cholesky factorizations on artificial intelligence accelerators},
  year={2021},
  volume={3},
  number={3},
  pages={286-297},
  doi={10.1007/s42514-021-00075-8},
}


@INPROCEEDINGS{9586141,
  author={Zhao, Jianqi and Wen, Yao and Luo, Yuchen and Jin, Zhou and Liu, Weifeng and Zhou, Zhenya},
  booktitle={2021 58th ACM/IEEE Design Automation Conference (DAC)},
  title={SFLU: Synchronization-Free Sparse LU Factorization for Fast Circuit Simulation on GPUs},
  year={2021},
  pages={37-42},
  doi={10.1109/DAC18074.2021.9586141},
}


@ARTICLE{9459513,
  author={Xie, Zhen and Tan, Guangming and Liu, Weifeng and Sun, Ninghui},
  journal={IEEE Transactions on Parallel and Distributed Systems},
  title={A Pattern-Based SpGEMM Library for Multi-Core and Many-Core Architectures},
  year={2022},
  volume={33},
  number={1},
  pages={159-175},
  doi={10.1109/TPDS.2021.3090328},
}


@ARTICLE{9380961,
  author={Zhang, Feng and Su, Jiya and Liu, Weifeng and He, Bingsheng and Wu, Ruofan and Du, Xiaoyong and Wang, Rujia},
  journal={IEEE Transactions on Parallel and Distributed Systems},
  title={YuenyeungSpTRSV: A Thread-Level and Warp-Level Fusion Synchronization-Free Sparse Triangular Solve},
  year={2021},
  volume={32},
  number={9},
  pages={2321-2337},
  doi={10.1109/TPDS.2021.3066635},
}


@ARTICLE{9373912,
  author={Chen, Jing and Fang, Jianbin and Liu, Weifeng and Yang, Canqun},
  journal={IEEE Transactions on Parallel and Distributed Systems},
  title={BALS: Blocked Alternating Least Squares for Parallel Sparse Matrix Factorization},
  year={2021},
  volume={32},
  number={9},
  pages={2291-2302},
  doi={10.1109/TPDS.2021.3064942}
}


@Article{Ji2021,
  author="Ji, Haonan and Lu, Shibo and Hou, Kaixi and Wang, Hao and Jin, Zhou and Liu, Weifeng and Vinter, Brian",
  title="Segmented Merge: A New Primitive for Parallel Sparse Matrix Computations",
  journal="International Journal of Parallel Programming",
  year="2021",
  month="3",
  day="26",
  volume="",
  number="",
  pages="",
  issn="1573-7640",
}


@INPROCEEDINGS{9460505,
  author={Niu, Yuyao and Lu, Zhengyang and Dong, Meichen and Jin, Zhou and Liu, Weifeng and Tan, Guangming},
  booktitle={2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
   title={TileSpMV: A Tiled Algorithm for Sparse Matrix-Vector Multiplication on GPUs},
  year={2021},
  volume={},
  number={},
  pages={68-78},
  doi={10.1109/IPDPS49936.2021.00016}
}


@inproceedings{10.1145/3404397.3404413,
  author = {Lu, Zhengyang and Niu, Yuyao and Liu, Weifeng},
  title = {Efficient Block Algorithms for Parallel Sparse Triangular Solve},
  year = {2020},
  isbn = {9781450388160},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3404397.3404413},
  doi = {10.1145/3404397.3404413},
  booktitle = {49th International Conference on
  Parallel Processing - ICPP},
  articleno = {63},
  numpages = {11},
  keywords = {block algorithm, sparse triangular solve, GPU, sparse matrix},
  location = {Edmonton, AB, Canada},
  series = {ICPP '20}
}


@inproceedings{10.1145/3404397.3404400,
  author = {Su, Jiya and Zhang, Feng and Liu, Weifeng and He, Bingsheng and Wu, Ruofan and Du, Xiaoyong and Wang, Rujia},
  title = {CapelliniSpTRSV: A Thread-Level Synchronization-Free Sparse Triangular Solve on GPUs},
  year = {2020},
  isbn = {9781450388160},
  publisher = {Association for Computing Machinery},
  address = {New York, NY, USA},
  url = {https://doi.org/10.1145/3404397.3404400},
  doi = {10.1145/3404397.3404400},
  booktitle = {49th International Conference on Parallel Processing - ICPP},
   articleno = {2},
  numpages = {11},
  location = {Edmonton, AB, Canada},
  series = {ICPP '20},
}


@InProceedings{10.1007/978-3-030-79478-1_20,
  author = {Yu, Xiaosong and Ma, Huihui and Qu, Zhengyu and Fang, Jianbin and Liu, Weifeng},
  editor={He, Xin and Shao, En and Tan Guangming},
  title = {NUMA-Aware Optimization of Sparse Matrix-Vector Multiplication on ARMv8-Based Many-Core Architectures},
  booktitle={Network and Parallel Computing},
  year = {2021},
   publisher={Springer International Publishing},
  address={Cham},
  pages={231--242},
  isbn={978-3-030-79478-1},
}


@inproceedings{xie2019spgemm,
  author = {Xie, Zhen and Tan, Guangming and Liu, Weifeng and Sun, Ninghui},
  title = {IA-SpGEMM: An Input-aware Auto-tuning Framework for Parallel Sparse Matrix-Matrix Multiplication},
  booktitle = {Proceedings of the 2019 International Conference on Supercomputing},
  series = {ICS '19},
  year = {2019},
  isbn = {},
  location = {},
  pages = {},
  numpages = {12},
  url = {},
  doi = {},
  acmid = {},
  address = {},
}


@Article{zhang2019apu,
  author="Feng Zhang and Weifeng Liu and Ningxuan Feng and Jidong Zhai and Xiaoyong Du",
  title="Performance Evaluation and Analysis of Sparse Matrix and Graph Kernels on Heterogeneous Processors",
  journal="CCF Transactions on High Performance Computing",
  year="2019",
  month="",
  day="",
  volume="",
  number="",
  pages="",
  issn="2524-4922",
}


@Article{liu2019spgemm,
  author="Liu, Junhong and He, Xin and Liu, Weifeng and Tan, Guangming",
  title="Register-Aware Optimizations for Parallel Sparse Matrix-Matrix Multiplication",
  journal="International Journal of Parallel Programming",
  year="2019",
  month="",
  day="",
  volume="",
  number="",
  pages="",
  issn="1573-7640",
}


@article {chen2018clmf,
  author = {Jing Chen and Jianbin Fang and Weifeng Liu and Tao Tang and Canqun Yang},
  title = {clMF: A Fine-Grained and Portable Alternating Least Squares Algorithm for Parallel Matrix Factorization},
  journal = {Future Generation Computer Systems},
  volume = {},
  number = {},
  pages = {},
  year = {2018},
}


@inproceedings{li2018warp,
  author = {Li, Ang and Liu, Weifeng and Wang, Linnan and Barker, Kevin and Song, Shuaiwen Leon},
  title = {Warp-Consolidation: A Novel Execution Model for GPUs},
  booktitle = {Proceedings of the 2018 International Conference on Supercomputing},
  series = {ICS '18},
  year = {2018},
  isbn = {978-1-4503-5783-8},
  location = {Beijing, China},
  pages = {53--64},
  numpages = {12},
  url = {http://doi.acm.org/10.1145/3205289.3205294},
  doi = {10.1145/3205289.3205294},
  acmid = {3205294},
  address = {New York, NY, USA},
}


@inproceedings{wang2018sptrsv,
  author = {Wang, Xinliang and Liu, Weifeng and Xue, Wei and Wu, Li},
  title = {swSpTRSV: A Fast Sparse Triangular Solve with Sparse Level Tile Layout on Sunway Architectures},
  booktitle = {Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
  series = {PPoPP '18},
  year = {2018},
  isbn = {978-1-4503-4982-6},
  location = {Vienna, Austria},
  pages = {338--353},
  numpages = {16},
  url = {http://doi.acm.org/10.1145/3178487.3178513},
  doi = {10.1145/3178487.3178513},
  acmid = {3178513},
  publisher = {ACM},
  address = {New York, NY, USA},
}


@inproceedings{liu2018spgemm,
  author = {Liu, Junhong and He, Xin and Liu, Weifeng and Tan, Guangming},
  title = {Register-based Implementation of the Sparse General Matrix-matrix Multiplication on GPUs},
  booktitle = {Proceedings of the 23rd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
  series = {PPoPP '18},
  year = {2018},
  isbn = {978-1-4503-4982-6},
  location = {Vienna, Austria},
  pages = {407--408},
  numpages = {2},
  url = {http://doi.acm.org/10.1145/3178487.3178529},
  doi = {10.1145/3178487.3178529},
  acmid = {3178529},
  publisher = {ACM},
  address = {New York, NY, USA},
}


@inproceedings{li2017hbm,
  author = {Li, Ang and Liu, Weifeng and Kristensen, Mads R. B. and Vinter, Brian and Wang, Hao and Hou, Kaixi and Marquez, Andres and Song, Shuaiwen Leon},
  title = {Exploring and Analyzing the Real Impact of Modern On-package Memory on HPC Scientific Kernels},
  booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
  series = {SC '17},
  year = {2017},
  isbn = {978-1-4503-5114-0},
  location = {Denver, Colorado},
  pages = {26:1--26:14},
  articleno = {26},
  numpages = {14},
  url = {http://doi.acm.org/10.1145/3126908.3126931},
  doi = {10.1145/3126908.3126931},
  acmid = {3126931},
  publisher = {ACM},
  address = {New York, NY, USA},
}


@article {liu2017sptrsv,
  author = {Liu, Weifeng and Li, Ang and Hogg, Jonathan D. and Duff, Iain S. and Vinter, Brian},
  title = {Fast Synchronization-Free Algorithms for Parallel Sparse Triangular Solves with Multiple Right-Hand Sides},
  journal = {Concurrency and Computation: Practice and Experience},
  volume = {29},
  number = {21},
  pages = {e4244--n/a},
  issn = {1532-0634},
  url = {http://dx.doi.org/10.1002/cpe.4244},
  doi = {10.1002/cpe.4244},
  year = {2017},
}


@inproceedings{hou2017segsort,
  author = {Hou, Kaixi and Liu, Weifeng and Wang, Hao and Feng, Wu-chun},
  title = {Fast Segmented Sort on GPUs},
  booktitle = {Proceedings of the International Conference on Supercomputing},
  series = {ICS '17},
  year = {2017},
  isbn = {978-1-4503-5020-4},
  location = {Chicago, Illinois},
  pages = {12:1--12:10},
  articleno = {12},
  numpages = {10},
  url = {http://doi.acm.org/10.1145/3079079.3079105},
  doi = {10.1145/3079079.3079105},
  acmid = {3079105},
  publisher = {ACM},
}


@inproceedings{li2017cta,
  author = {Li, Ang and Song, Shuaiwen Leon and Liu, Weifeng and Liu, Xu and Kumar, Akash and Corporaal, Henk},
  title = {Locality-Aware CTA Clustering for Modern GPUs},
  booktitle = {Proceedings of the Twenty-Second International Conference on Architectural Support for Programming Languages and Operating Systems},
  series = {ASPLOS '17},
  year = {2017},
  isbn = {978-1-4503-4465-4},
  location = {Xi'an, China},
  pages = {297--311},
  numpages = {15},
  url = {http://doi.acm.org/10.1145/3037697.3037709},
  doi = {10.1145/3037697.3037709},
  acmid = {3037709},
  publisher = {ACM},
}


@inproceedings{chen2017als,
  author = {Jing Chen and Jianbin Fang and Weifeng Liu and Tao Tang and Xuhao Chen and Canqun Yang},
  title = {Efficient and Portable ALS Matrix Factorization for Recommender Systems},
  booktitle = {Proceedings of the 6th International Workshop on Parallel and Distributed Computing for Large Scale Machine Learning and Big Data Analytics},
  series = {Parlearning '17},
  year = {2017},
  pages={409--418},
}


@inproceedings{liu2016sptrsv,
  author = {Liu, Weifeng and Li, Ang and Hogg, Jonathan and Duff, Iain S. and Vinter, Brian},
  title = {A Synchronization-Free Algorithm for Parallel Sparse Triangular Solves},
  booktitle = {Proceedings of the 22Nd International Conference on Euro-Par 2016: Parallel Processing - Volume 9833},
  year = {2016},
  isbn = {978-3-319-43658-6},
  pages = {617--630},
  numpages = {14},
  url = {http://dx.doi.org/10.1007/978-3-319-43659-3_45},
  doi = {10.1007/978-3-319-43659-3_45},
  acmid = {2990990},
  publisher = {Springer-Verlag New York, Inc.},
}


@inproceedings{wang2016sptrans,
  author = {Wang, Hao and Liu, Weifeng and Hou, Kaixi and Feng, Wu-chun},
  title = {Parallel Transposition of Sparse Data Structures},
  booktitle = {Proceedings of the 2016 International Conference on Supercomputing},
  series = {ICS '16},
  year = {2016},
  isbn = {978-1-4503-4361-9},
  location = {Istanbul, Turkey},
  pages = {33:1--33:13},
  numpages = {13},
  url = {http://doi.acm.org/10.1145/2925426.2926291},
  doi = {10.1145/2925426.2926291},
  publisher = {ACM},
}


@inproceedings{liu2015csr5,
  author = {Liu, Weifeng and Vinter, Brian},
  title = {CSR5: An Efficient Storage Format for Cross-Platform Sparse Matrix-Vector Multiplication},
  booktitle = {Proceedings of the 29th ACM International Conference on Supercomputing},
  series = {ICS '15},
  year = {2015},
  isbn = {978-1-4503-3559-1},
  location = {Newport Beach, California, USA},
  pages = {339--350},
  numpages = {12},
  url = {http://doi.acm.org/10.1145/2751205.2751209},
  doi = {10.1145/2751205.2751209},
  publisher = {ACM},
}


@article{liu2015spmv,
  author = {Liu, Weifeng and Vinter, Brian},
  title = {Speculative Segmented Sum for Sparse Matrix-vector Multiplication on Heterogeneous Processors},
  journal = {Parallel Computing},
  issue_date = {November 2015},
  volume = {49},
  number = {C},
  month = nov,
  year = {2015},
  issn = {0167-8191},
  pages = {179--193},
  numpages = {15},
  url = {https://doi.org/10.1016/j.parco.2015.04.004},
  doi = {10.1016/j.parco.2015.04.004},
}


@article{liu2015spgemm,
  author = {Liu, Weifeng and Vinter, Brian},
  title = {A Framework for General Sparse Matrix-Matrix Multiplication on GPUs and Heterogeneous Processors},
  journal = {Journal of Parallel and Distributed Computing},
  volume = {85},
  number = {C},
  month = nov,
  year = {2015},
  issn = {0743-7315},
  pages = {47--61},
  numpages = {15},
  url = {http://dx.doi.org/10.1016/j.jpdc.2015.06.010},
  doi = {10.1016/j.jpdc.2015.06.010},
}


@inproceedings{liu2014spgemm,
  author = {Liu, Weifeng and Vinter, Brian},
  title = {An Efficient GPU General Sparse Matrix-Matrix Multiplication for Irregular Data},
  booktitle = {Proceedings of the 2014 IEEE 28th International Parallel and Distributed Processing Symposium},
  series = {IPDPS '14},
  year = {2014},
  isbn = {978-1-4799-3800-1},
  pages = {370--381},
  numpages = {12},
  url = {http://dx.doi.org/10.1109/IPDPS.2014.47},
  doi = {10.1109/IPDPS.2014.47},
  publisher = {IEEE Computer Society},
}


@inproceedings{liu2014adheap,
  author = {Liu, Weifeng and Vinter, Brian},
  title = {Ad-heap: An Efficient Heap Data Structure for Asymmetric Multicore Processors},
  booktitle = {Proceedings of Workshop on General Purpose Processing Using GPUs},
  series = {GPGPU-7},
  year = {2014},
  isbn = {978-1-4503-2766-4},
  pages = {54:54--54:63},
  numpages = {10},
  url = {http://doi.acm.org/10.1145/2576779.2576786},
  doi = {10.1145/2576779.2576786},
  publisher = {ACM},
}


@phdthesis{phdthesisliu,
  author = {Weifeng Liu},
  title = {Parallel and Scalable Sparse Basic Linear Algebra Subprograms},
  year = 2015,
  school = {University of Copenhagen},
}


@article {ren2018trans,
  author={H. Ren and N. Kanhabua and A. M�gelmose and W. Liu and K. Kulkarni and S. Escalera and X. Bar� and T. B. Moeslund},
  journal={IET Computer Vision},
  title={Back-Dropout Transfer Learning for Action Recognition},
  year={2018},
  volume={12},
  number={4},
  pages={484--491},
  doi={10.1049/iet-cvi.2016.0309},
}


@inproceedings{ren2015dl,
  title={Unsupervised Behavior-Specific Dictionary Learning for Abnormal Event Detection},
  author={Huamin Ren and Weifeng Liu and S�ren Ingvor Olsen and Sergio Escalera and Thomas B. Moeslund},
  year={2015},
  month={September},
  pages={28.1--28.13},
  articleno={28},
  numpages={13},
  booktitle={Proceedings of the British Machine Vision Conference (BMVC)},
  publisher={BMVA Press},
  editor={Xianghua Xie, Mark W. Jones, and Gary K. L. Tam},
  doi={10.5244/C.29.28},
  isbn={1-901725-53-7},
  url={https://dx.doi.org/10.5244/C.29.28}
}