{"id":18,"date":"2017-12-12T17:38:30","date_gmt":"2017-12-12T17:38:30","guid":{"rendered":"https:\/\/dong-zhen.com\/?page_id=18"},"modified":"2026-03-06T00:46:53","modified_gmt":"2026-03-06T00:46:53","slug":"home","status":"publish","type":"page","link":"https:\/\/dong-zhen.com\/","title":{"rendered":"Home"},"content":{"rendered":"<!--themify_builder_content-->\n<div id=\"themify_builder_content-18\" data-postid=\"18\" class=\"themify_builder_content themify_builder_content-18 themify_builder tf_clear\">\n                    <div  data-parallax-bg=\"desktop\" data-css_id=\"li23859\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_li23859 tb_first tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_9nmd861 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_0gqc861   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h1>\u00a0<\/h1>\n<h1>Homepage of Zhen Dong<\/h1>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_5uvo861   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h6>Assistant Professor at UCSB, NVIDIA AI Researcher<\/h6>\n<h6>Looking for Potential Postdoc, PhD, MS &amp; Interns<\/h6>    <\/div>\n<\/div>\n<!-- \/module text -->        <div  data-lazy=\"1\" class=\"module_subrow themify_builder_sub_row tf_w col_align_top tb_col_count_2 tb_o5ke861\">\n                <div  data-lazy=\"1\" class=\"module_column sub_column col4-2 tb_dutb862 first\">\n                    <!-- module buttons -->\n<div  class=\"module module-buttons tb_v96x862 buttons-horizontal outline   rounded\" data-lazy=\"1\">\n        <div class=\"module-buttons-item tf_in_flx\">\n                        <a href=\"http:\/\/dong-zhen.com\/wp-content\/uploads\/CV_ZhenDong_7.8.pdf\" class=\"ui builder_button tf_in_flx tb_default_color\" target=\"_blank\" rel=\"noopener\">\n                                                DOWNLOAD RESUME                                        <\/a>\n                <\/div>\n            <\/div>\n<!-- \/module buttons -->\n        <\/div>\n                    <div  data-lazy=\"1\" class=\"module_column sub_column col4-2 tb_ta6p862 last\">\n                    <!-- module buttons -->\n<div  class=\"module module-buttons tb_1psb862 buttons-horizontal outline   circle\" data-lazy=\"1\">\n        <div class=\"module-buttons-item tf_in_flx\">\n                        <a href=\"https:\/\/scholar.google.com\/citations?user=czxMUzcAAAAJ&#038;hl=en\" class=\"ui builder_button tf_in_flx tb_default_color\" target=\"_blank\" rel=\"noopener\">\n                                                Google Scholar                                        <\/a>\n                <\/div>\n            <\/div>\n<!-- \/module buttons -->\n        <\/div>\n                    <\/div>\n                <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-zooming-bg=\"desktop\" data-anchor=\"research-interests\" data-css_id=\"93yp859\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-research-interests tb_93yp859 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_t7qz863 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_e8fg863   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Research Interests<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_kkdm863   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h3>Efficient AI: Efficient MLLM, VideoGen &amp; Embodied AI<\/h3>\n<h3>LLM Post-Training, Model Compression, AI Systems<\/h3>\n<h3>Function-Calling Agents and Multi-Agent Systems<\/h3>\n<h3>Hardware-Software Co-Design and AI for Science<\/h3>\n<h3>Efficient Evaluation and Alignment of Foundation Models<\/h3>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-anchor=\"education\" data-css_id=\"qixs859\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-education tb_qixs859 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_tuq2863 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_7acl863   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Education<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_vx0u864   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h3><strong>Ph.D.\/Postdoc at University of California, Berkeley<\/strong><\/h3>\n<h3><strong>B.S. at Peking University: Rank 1\/327 in EECS<\/strong><\/h3>\n<h3>\u00a0<\/h3>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-tablet_landscape-padding=\"1px\" data-parallax-bg=\"desktop\" data-anchor=\"awards\" data-css_id=\"vllx859\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-awards tb_vllx859 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_sjjy864 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_bkqg864   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Awards<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_roig864   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>\n<h3>Winner of 2018-2020 Berkeley Fellowship.<\/h3>\n<\/li>\n<li>\n<h3>Winner of PhD Forum (Second Place) at DAC 2024.<\/h3>\n<\/li>\n<li>\n<h3>Doctoral Consortium at CVPR 2024.<\/h3>\n<\/li>\n<li>\n<h3>Best Paper Award at LLM Evaluation Workshop at NeurIPS 2025.<\/h3>\n<\/li>\n<li>\n<h3>Best Paper Nomination at Practical DL Workshop at AAAI 2023.<\/h3>\n<\/li>\n<li>\n<h3>1st Place in EMCC 2020 Competition on both Classification and Object Detection tracks.<\/h3>\n<\/li>\n<li>\n<h3>2nd Place in Visual Wake Word Challenge at CVPR 2019.<\/h3>\n<\/li>\n<li>\n<h3>AWS Research Credits Award and Google Cloud Research Credits Award.<\/h3>\n<\/li>\n<li>\n<h3>1st Place Research Funding Proposal at Berkeley Deep Drive (BDD) 2019<\/h3>\n<\/li>\n<li>\n<h3>Winner of SenseTime Scholarship in 2018<\/h3>\n<\/li>\n<li>\n<h3>Winner of Tang Lixin Scholarship for outstanding students in China (top 0.5%).<\/h3>\n<\/li>\n<li>\n<h3>Winner of Tang Lixin 1st Prize Scholarship for graduate students studying abroad (top 0.05%).<\/h3>\n<\/li>\n<li>\n<h3>Winner of Fang Zheng Scholarship (top 1%).<\/h3>\n<\/li>\n<li>\n<h3>1st Prize in the Chinese Olympiad in Physics and the Chinese Physics Competition for college students.<\/h3>\n<\/li>\n<li>\n<h3>Princeton University Math (PUMac) Competition: Top three among all participants in geometry group.<\/h3>\n<\/li>\n<li>\n<h3>Top Ten Undergraduate Research Award at PKU EECS.<\/h3>\n<\/li>\n<li>\n<h3>Outstanding Graduates at Peking University and Outstanding Graduates in Beijing.<\/h3>\n<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-anchor=\"publications\" data-css_id=\"5m3t859\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-publications tb_5m3t859 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_lfpi864 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_98t3864   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Publications<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_mtf5864   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>Zhikai Li, Jiatong Li, Xuewen Liu, Wangbo Zhao, Pan Du, Kaicheng Zhou, Qingyi Gu, Yang You, <strong>Zhen Dong<\/strong><span style=\"font-weight: bold;\">\u2709<\/span><strong>,<\/strong> Kurt Keutzer, <a href=\"https:\/\/openreview.net\/forum?id=rcdGXD2dfW\">&#8220;K-Sort Eval: Efficient Preference Evaluation for Visual Generation via Corrected VLM-as-a-Judge,<\/a>&#8221; ICLR 2026.<\/li>\n<li>Hongyu Zhang, Yufan Deng, Zilin Pan, Pengtao Jiang, Bo Li, Qibin Hou, Zhiyang Dou,\u00a0<span style=\"font-weight: bold;\">Zhen Dong<\/span>, Daquan Zhou,\u00a0<a href=\"https:\/\/openreview.net\/forum?id=QixNhagZ9t\">&#8220;TS-Attn: Temporal-wise Separable Attention for Multi-Event Video Generation,<\/a>&#8221; ICLR 2026.<\/li>\n<li><a href=\"https:\/\/research.nvidia.com\/labs\/nemotron\/files\/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf\">&#8220;Nemotron 3 Nano: Open, Efficient Mixture-of-Experts Hybrid Mamba-Transformer Model for Agentic Reasoning,<\/a>&#8221; Tech Report 2025.<\/li>\n<li><a href=\"https:\/\/arxiv.org\/pdf\/2508.14444\">&#8220;NVIDIA Nemotron Nano2: Accurate and Efficient Hybrid Mamba-Transformer Reasoning Model,<\/a>&#8221; Tech Report 2025.<\/li>\n<li>Zhikai Li, Xuewen Liu, Dongrong Fu, Jianquan Li, Qingyi Gu, Kurt Keutzer, <span style=\"font-weight: bold;\">Zhen Dong\u2709.<\/span> <a href=\"https:\/\/arxiv.org\/abs\/2408.14468\">&#8220;K-Sort Arena: Efficient and reliable benchmarking for generative models via K-wise human preferences,<\/a>&#8221; CVPR 2025.<\/li>\n<li>Zhikai Li, Xiaoxuan Liu, Banghua Zhu, <span style=\"font-weight: bold;\">Zhen Dong,<\/span> Qingyi Gu, Kurt Keutzer, <a href=\"https:\/\/arxiv.org\/abs\/2310.07147\">&#8220;QFT: Quantized full-parameter tuning of LLMs with affordable resources,<\/a>&#8221; Scaling Post-training for LLMs Workshop at ICLR 2026.<\/li>\n<li>Zefan Cai, Wen Xiao, &#8230;\u00a0<strong>Zhen Dong<\/strong>, Anima Anandkumar, Abedelkadir Asi, Junjie Hu.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2505.24133\">&#8220;R-KV: Redundancy-aware KV Cache Compression for Reasoning Models,<\/a>&#8221; NeurIPS 2025.<\/li>\n<li><a href=\"https:\/\/openreview.net\/group?id=NeurIPS.cc\/2025\/Workshop\/LLM_Evaluation#tab-accept-oral\">&#8220;Measure of All Measures: Quantifying LLM Benchmark Quality,<\/a>&#8221; Jihan Yao, &#8230;\u00a0<strong>Zhen Dong<\/strong>, &#8230;, Best Paper Award at LLM Evaluation Workshop, NeurIPS 2025.<\/li>\n<li>Yinsheng Li, <span style=\"font-weight: bold;\">Zhen Dong\u2709,<\/span> Yi Shao\u2709. <a href=\"https:\/\/arxiv.org\/abs\/2507.11527\">&#8220;DrafterBench: Benchmarking Large Language Models for Tasks Automation in Civil Engineering,<\/a>&#8221; arXiv 2025.<\/li>\n<li>Jinbin Bai, Tian Ye, Wei Chow, Enxin Song, Qinguo Chen, Xiangtai Li, <strong>Zhen Dong,<\/strong> Lei Zhu, Shuicheng Yan. <a href=\"https:\/\/arxiv.org\/abs\/2410.08261\">&#8220;Meissonic: Revitalizing masked generative transformers for efficient high-resolution text-to-image synthesis,<\/a>&#8221; ICLR 2025.<\/li>\n<li>Aosong Feng, Weikang Qiu, Jinbin Bai, Kaicheng Zhou,\u00a0<strong>Zhen Dong,<\/strong>\u00a0Xiao Zhang, Rex Ying, Leandros Tassiulas.\u00a0<a style=\"color: #2341eb;\" href=\"https:\/\/arxiv.org\/abs\/2403.04880\">&#8220;An item is worth a prompt: versatile image editing with disentangled control,<\/a>&#8221; AAAI 2025.<\/li>\n<li>Lin Xu, Zhiyuan Hu, Daquan Zhou\u2709, Hongyu Ren,\u00a0<span style=\"font-weight: bold;\">Zhen Dong\u2709,<\/span>\u00a0Kurt Keutzer, See-Kiong Ng, Jiashi Feng.\u00a0<a href=\"https:\/\/openreview.net\/forum?id=VCS2ZPRg1m\">&#8220;MAgIC: Investigation of large language model powered multi-agent in cognition, adaptability, rationality and collaboration,<\/a>&#8221; EMNLP 2024.<\/li>\n<li>Chenyu Wang*,\u00a0<strong>Zhen Dong*\u2709,<\/strong>\u00a0Daquan Zhou*\u2709, Zhenhua Zhu, Yu Wang, Jiashi Feng, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2311.07620\">&#8220;EPIM: Efficient Processing-In-Memory Accelerators based on Epitome,<\/a>&#8221; DAC 2024.<\/li>\n<li>Jinbin Bai,\u00a0<span style=\"font-weight: bold;\">Zhen Dong,<\/span>\u00a0Aosong Feng, Xiao Zhang, Tian Ye, Kaicheng Zhou, Mike Zheng Shou.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2310.16002\">&#8220;Integrating View Conditions for Image Synthesis,<\/a>&#8221; IJCAI 2024.<\/li>\n<li>Javier Campos, <strong>Zhen Dong,<\/strong> Javier Duarte, Amir Gholami, Michael W. Mahoney, Jovan Mitrevski, Nhan Tran. <a href=\"https:\/\/arxiv.org\/abs\/2304.06745\">&#8220;End-to-end codesign of Hessian-aware quantized neural networks for FPGAs and ASICs,<\/a>&#8221; ACM Transactions on Reconfigurable Technology and Systems (TRETS) 2024.<\/li>\n<li>Junyi Yao, Yijiang Liu, <span style=\"font-weight: bold;\">Zhen Dong,<\/span> Mingfei Guo, Jiashi Feng, Kurt Keutzer, Li Du, Daquan Zhou, Shanghang Zhang. <a href=\"https:\/\/openreview.net\/forum?id=2pVogxJyDA\">&#8220;PromptCoT: Align prompt distribution via adapted chain of thought,<\/a>&#8221; CVPR 2024.<\/li>\n<li>Yuzhang Shang, Zhihang Yuan, Qiang Wu,\u00a0<span style=\"font-weight: bold;\">Zhen Dong<strong>\u2709<\/strong>.\u00a0<\/span><a href=\"https:\/\/arxiv.org\/abs\/2310.00034\">&#8220;PB-LLM: Partially Binarized Large Language Models,<\/a>&#8221; ICLR 2024.<\/li>\n<li>Lutfi Erdogan, VAR Kanakagiri, Kurt Keutzer, <span style=\"font-weight: bold;\">Zhen Dong<strong>\u2709<\/strong>.<\/span> <a href=\"https:\/\/practical-dl.github.io\/2024\/long_paper\/17\/CameraReady\/17.pdf\">&#8220;Stochastic Communication Avoidance for Recommendation Systems,<\/a>&#8221; IEEE CAI 2024.<\/li>\n<li>Ze Ma, Daquan Zhou\u2709, Chun-Hsiao Yeh, Xue-She Wang, Xiuyu Li, Huanrui Yang,\u00a0<strong>Zhen Dong<\/strong><strong>\u2709<\/strong><strong>,<\/strong>\u00a0Kurt Keutzer, Jiashi Feng.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2402.09368\">&#8220;Magic-Me: Identity-Specific Video Customized Diffusion,<\/a>&#8221; AI4VA Workshop at ECCV 2024.<\/li>\n<li>Rongyu Zhang, Yulin Luo, Huanrui Yang,\u00a0<span style=\"font-weight: bold;\">Zhen Dong,<\/span>\u00a0\u2026 &amp; Shanghang Zhang. <a href=\"https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/29622\">&#8220;Efficient Deweahter Mixture-of-Experts with Uncertainty-Aware Feature-wise Linear Modulation,<\/a>&#8221; AAAI 2024.<\/li>\n<li>Sehoon Kim, Coleman Hooper, Amir Gholami,\u00a0<span style=\"font-weight: bold;\">Zhen Dong,<\/span>\u00a0Xiuyu Li, Sheng Shen, Michael W. Mahoney, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2306.07629\">&#8220;SqueezeLLM: Dense-and-Sparse Quantization,<\/a>&#8221; ICML 2024.<\/li>\n<li>Anthony Chen, Huanrui Yang, Yulu Gan, Denis A Gudovskiy,\u00a0<strong>Zhen Dong,<\/strong>\u00a0Haofan Wang, Tomoyuki Okuno, Yohei Nakata, Shanghang Zhang, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2312.09148\">&#8220;Split-Ensemble: Efficient OOD-aware ensemble via task and model splitting,<\/a>&#8221; ICML 2024.<\/li>\n<li>Zhihang Yuan, Yuzhang Shang, Yang Zhou,\u00a0<span style=\"font-weight: bold;\">Zhen Dong,<\/span>\u00a0Chenhao Xue, Bingzhe Wu, Zhikai Li, Qingyi Gu, Yong Jae Lee, Yan Yan, Beidi Chen, Guangyu Sun, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2402.16363\">&#8220;LLM Inference Unveiled: Survey and Roofline Model Insights,<\/a>&#8221; arXiv 2024.<\/li>\n<li>Huanrui Yang, Yafeng Huang, <span style=\"font-weight: bold;\">Zhen Dong,<\/span> Denis A Gudovskiy, Tomoyuki Okuno, Yohei Nakata, Yuan Du, Kurt Keutzer, Shanghang Zhang. <a href=\"https:\/\/arxiv.org\/abs\/2407.03442\">&#8220;Fisher-aware Quantization for DETR Detectors with Critical-category Objectives,<\/a>&#8221; WANT Workshop at ICML 2024.<\/li>\n<li>Yifan Zhang*,\u00a0<b>Zhen Dong*,<\/b> Huanrui Yang, Ming Lu, Cheng-Ching Tseng, Yandong Guo, Kurt Keutzer, Li Du, Shanghang Zhang. <a href=\"https:\/\/practical-dl.github.io\/2023\/extended_abstract\/22\/CameraReady\/22.pdf\">&#8220;QD-BEV: Quantization-aware View-guided Distillation for Multi-view\u00a03D Object Detection,<\/a>&#8221; ICCV 2023.<\/li>\n<li>Xiuyu Li, Yijiang Liu, Long Lian, Huanrui Yang, <span style=\"font-weight: bold;\">Zhen Dong,<\/span> Daniel Kang, Shanghang Zhang, Kurt Keutzer. <a href=\"https:\/\/arxiv.org\/abs\/2302.04304\">&#8220;Q-Diffusion: Quantizing Diffusion Models,<\/a>&#8221; ICCV 2023.<\/li>\n<li>Venkat Srinivasan, <span style=\"font-weight: bold;\">Zhen Dong,<\/span> Banghua Zhu, Brian Yu, Hanzi Mao, Damon Mosk-Aoyama, Kurt Keutzer, Jiantao Jiao, Jian Zhang. <a href=\"https:\/\/openreview.net\/pdf?id=5lcPe6DqfI\">&#8220;NexusRaven: A Commercially-Permissive Language Model for Function Calling,<\/a>&#8221; <a href=\"https:\/\/sites.google.com\/view\/fmdm-neurips23\">FMDM Workshop<\/a>\u00a0&amp; <a href=\"https:\/\/an-instructive-workshop.github.io\/\">Instruction Workshop<\/a> at NeurIPS 2023<\/li>\n<li><span dir=\"ltr\" style=\"box-sizing: border-box; color: transparent; cursor: text; position: absolute; transform-origin: 0px 0px; white-space: pre; left: 365.509px; top: 191.13px; font-size: 22.2694px; font-family: sans-serif; transform: scaleX(0.968593);\" role=\"presentation\">3D Object Detection<\/span>Yijiang Liu, Huanrui Yang, <b>Zhen Dong,<\/b> Kurt Keutzer, Li Du, Shanghang Zhang. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2211.16056\">NoisyQuant: Noisy Bias-Enhanced Post-Training Activation Quantization for Vision Transformers<\/a>,&#8221; CVPR 2023.<\/li>\n<li>Lirui Xiao, Huanrui Yang, <b>Zhen Dong,<\/b> Kurt Keutzer, Li Du, Shanghang Zhang. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2212.02770\">CSQ: Growing Mixed-Precision Quantization Scheme with Bi-level Continuous Sparsification<\/a>,&#8221; DAC 2023.<\/li>\n<li>Yifan Zhang*, <b>Zhen Dong*,<\/b>\u00a0Huanrui Yang, Ming Lu, Cheng-Ching Tseng, Yandong Guo, Kurt Keutzer, Li Du, Shanghang Zhang. &#8220;<a href=\"https:\/\/practical-dl.github.io\/2023\/extended_abstract\/22\/CameraReady\/22.pdf\">QD-BEV: Quantization-aware View-guided Distillation for 3D Object Detection<\/a>,&#8221; Best Paper Nomination, Practical DL Workshop at AAAI 2023.<\/li>\n<li>Javier Campos, <b>Zhen Dong,<\/b> Javier Duarte, Amir Gholami, Michael Mahoney, Jovan Mitrevski and Nhan Tran. &#8220;<a href=\"https:\/\/oscar-workshop.github.io\/Program_2023.html\">End-to-end codesign of Hessian-aware quantized neural networks for FPGAs and ASICs<\/a>,&#8221;\u00a0OSCAR Workshop at ISCA 2023.<\/li>\n<li><strong>Zhen Dong*,\u00a0<\/strong>Kaicheng Zhou*, Guohao Li*, Qiang Zhou, Mingfei Guo, Bernard Ghanem, Kurt Keutzer, Shanghang Zhang. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2205.02162\">UnrealNAS: Can We Search Neural Architectures with Unreal Data?<\/a>&#8221; DAC 2023 Workshop.<\/li>\n<li>Tian Li, Xiang Chen,\u00a0<strong>Zhen Dong,\u00a0<\/strong>Weijiang Yu, Yijun Yan, Shanghang Zhang, Kurt Keutzer. &#8220;<a href=\"https:\/\/www.ijcai.org\/proceedings\/2022\/0585.pdf\">Domain-Adaptive Text Classification with Structured Knowledge from Unlabeled Data<\/a>,&#8221; Long Oral, IJCAI-ECAI 2022.<\/li>\n<li><span style=\"font-family: Arial, Helvetica, sans-serif;\"><b>Zhen Dong.<\/b>\u00a0&#8220;<\/span><a style=\"font-family: Arial, Helvetica, sans-serif;\" href=\"https:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2022\/EECS-2022-231.html\">Hardware-aware Efficient Deep Learning<\/a>,<span style=\"font-family: Arial, Helvetica, sans-serif;\">&#8221;\u00a0<\/span>PhD Thesis, 2022.<\/li>\n<li>Shixing Yu*, Zhewei Yao*, Amir Gholami*,\u00a0<strong>Zhen Dong*<\/strong>, Michael W. Mahoney, and Kurt Keutzer. &#8220;<a href=\"https:\/\/openaccess.thecvf.com\/content\/WACV2022\/html\/Yu_Hessian-Aware_Pruning_and_Optimal_Neural_Implant_WACV_2022_paper.html\">Hessian-Aware Pruning and Optimal Neural Implant<\/a>,&#8221; Oral, WACV 2022.<\/li>\n<li>Allison McCarn Deiana, Nhan Tran, &#8230;\u00a0<strong>Zhen Dong,\u00a0<\/strong>&#8230; Olivia Weng. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2110.13041\">Applications and Techniques for Fast Machine Learning in Science<\/a>,&#8221; Frontiers in Big Data 2022.<\/li>\n<li><strong>Zhen Dong*,\u00a0<\/strong>Yizhao Gao*, Qijing Huang, John Wawrzynek, Hayden K.H. So, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2104.12766\">HAO: Hardware-aware Neural Architecture Optimization for Efficient Inference<\/a>,&#8221; Oral, FCCM 2021.<\/li>\n<li><strong>Zhen Dong*,\u00a0<\/strong>Dequan Wang*, Qijing Huang*, Yizhao Gao, Yaohui Cai, Tian Li, Bichen Wu, Kurt Keutzer, John Wawrzynek. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2006.08357\">CoDeNet: Algorithm-hardware Co-design for Deformable Convolution<\/a>,&#8221; Oral, FPGA 2021.<\/li>\n<li>Zhewei Yao*, <strong>Zhen Dong*,<\/strong> Zhangcheng Zheng*, Amir Gholami*, Jiali Yu, Eric Tan, Leyuan Wang, Qijing Huang, Yida Wang, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"http:\/\/proceedings.mlr.press\/v139\/yao21a.html\">HAWQV3: Dyadic Neural Network Quantization<\/a>,&#8221; ICML 2021.<\/li>\n<li>Amir Gholami*, Sehoon Kim*,\u00a0<strong>Zhen Dong*,<\/strong>\u00a0Zhewei Yao*, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2103.13630\">A Survey of Quantization Methods for Efficient Neural Network Inference<\/a>,&#8221; BLPCV (<a href=\"https:\/\/www.routledge.com\/Low-Power-Computer-Vision-Improve-the-Efficiency-of-Artificial-Intelligence\/Thiruvathukal-Lu-Kim-Chen-Chen\/p\/book\/9780367744700\">Book of Low-Power Computer Vision<\/a>) 2021.<\/li>\n<li>Tian Li, Xiang Chen, Shanghang Zhang, <strong>Zhen Dong<\/strong>, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/2012.02943\">Cross-Domain Sentiment Classification with In-Domain Contrastive Learning<\/a>,&#8221; short version at NeurIPS 2020 SSL Workshop, long version at ICASSP 2021.<\/li>\n<li><strong>Zhen Dong,<\/strong>\u00a0Zhewei Yao, Yaohui Cai, Daiyaan Arfeen, Amir Gholami, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/1911.03852\">HAWQ-V2: Hessian Aware trace-Weighted Quantization of Neural Networks<\/a>,&#8221; NeurIPS 2020.<\/li>\n<li>Yaohui Cai*, Zhewei Yao*,<strong>\u00a0Zhen Dong*,<\/strong> Amir Gholami, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/pdf\/2001.00281.pdf\">ZeroQ: A Novel Zero Shot Quantization Framework,<\/a>&#8221; CVPR 2020.<\/li>\n<li>Sheng Shen<strong>*<\/strong>,<strong> Zhen Dong*, <\/strong>Jiayu Ye<strong>*<\/strong>, Linjian Ma, Zhewei Yao, Amir Gholami, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/Q-BERT.pdf\">Q-BERT: Hessian Based Ultra Low Precision Quantization of BERT<\/a>,&#8221; Spotlight, AAAI 2020.<\/li>\n<li><strong>Zhen Dong,<\/strong> Zhewei Yao, Daiyaan Arfeen, Yaohui Cai, Michael Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/sites.google.com\/site\/optneurips19\/\">Trace Weighted Hessian-Aware Quantization<\/a>,&#8221; Oral, Opt-Workshop, NeurIPS 2019.<\/li>\n<li>Q. Huang, D. Wang, Y. Gao, Y. Cai, <strong>Zhen Dong<\/strong>, B. Wu, K. Keutzer and J. Wawrzynek. &#8220;<a href=\"https:\/\/www.emc2-workshop.com\/neurips-19\">Algorithm-hardware Co-design for Deformable Convolution<\/a>,&#8221; Oral, EMC2-Workshop, NeurIPS 2019.<\/li>\n<li><strong>Zhen Dong<\/strong>, Yaohui Cai, Amir Gholami, Tianjun Zhang, Kurt Keutzer. &#8220;<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/2019_Visual_Wake_Words.pdf\">Ultra-low Bit Quantization for Visual Wake Word Challenge<\/a>,&#8221; 2nd Place at VWW Competition, CVPR 2019.<\/li>\n<li><strong>Zhen Dong*, <\/strong>Zhewei Yao*, Amir Gholami*, Michael W. Mahoney, Kurt Keutzer. &#8220;<a href=\"https:\/\/arxiv.org\/abs\/1905.03696\" target=\"_blank\" rel=\"noopener\">HAWQ: Hessian AWare Quantization of Neural Networks with Mixed-Precision<\/a>,&#8221; ICCV 2019.<\/li>\n<li>Runze Han, Peng Huang, Yachen Xiang, Chen Liu,\u00a0<strong>Zhen Dong<\/strong>, et al. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8605508\">&#8220;A Novel Convolution Computing Paradigm Based on NOR Flash Array with High Computing Speed and Energy Efficiency,<\/a>&#8221; IEEE Transactions on Circuits and Systems (TCAS) 2019, p.1-12.<\/li>\n<li><strong>Zhen Dong<\/strong>, Zheng Zhou, Zefan Li, Peng Huang, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8558705?casa_token=30leNIKf22gAAAAA:BP_d1iHbINXQIKOGr7ZGBJyN0UhzVPl0bLbrkDumW_IYb9ktPd5EXlA_JQImfz3M3HujwLDH3A\">&#8220;Convolutional Neural Networks for Image Recognition and Online Learning Based on RRAM Devices,<\/a>&#8221; IEEE Transactions on Electron Devices (TED) 2018, p.793-801.<\/li>\n<li>Jinfeng Kang,<strong> Zhen Dong,<\/strong> Peng Huang, Renze Han, Lifeng Liu, Xiaoyan Liu. China patent about 3D RRAM.<\/li>\n<li>Huang, P., Li, Z., <strong>Zhen Dong<\/strong>, Han, R., Zhou, Z., Zhu, D., Liu, L., Liu, X. and Kang, J. <a href=\"https:\/\/pubs.acs.org\/doi\/full\/10.1021\/acsaelm.9b00011?casa_token=oPYiQIWubz8AAAAA%3AU22Ile8ZCapAPJzQC5QjDJ2LTDxtoE58snXHf5JPzJSXXUHbcLQGtfNgFCTF9tpM4bP5KmB0YJNcZyM\">&#8220;Binary Resistive Switching Device Based Electronic Synapse with Spike-Rate-Dependent-Plasticity for Online Learning,<\/a>&#8221; ACS Applied Electronic Materials 2018, pp. 845-853.<\/li>\n<li>Xinxin Wang, Peng Huang,<strong>\u00a0Zhen Dong<\/strong>, Zheng Zhou, Yuning Jiang, Runze Han, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8403854\">\u201cA Novel RRAM-based Adaptive-Threshold LIF Neuron Circuit for High Recognition Accuracy,<\/a>\u201d International Symposium on VLSI Technology, Systems and Applications (VLSI-TSA) 2018, pp. 1-2.<\/li>\n<li><strong>Zhen Dong<\/strong>, Z. Zhou, Z. F. Li, C. Liu, Y. N. Jiang, P. Huang, L. F. Liu, X. Y. Liu, and J. F. Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8242339?casa_token=P-71TMIKHnkAAAAA:JsD9t-U2MhI8JPaxNVze8w5nZViV4n39lDOhU5Ix3R9Y5drW4rna45EdPCwlTeat_pA-i34mkQ\">&#8220;RRAM based convolutional neural networks for high accuracy pattern recognition and online learning tasks,<\/a>&#8221; Oral, VLSI-SNW 2017, pp. 145-146. IEEE, 2017.<\/li>\n<li>Zheng Zhou, Chen Liu, Wensheng Shen, <strong>Zhen Dong<\/strong>, Zhe Chen, Peng Huang, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/link.springer.com\/article\/10.1186\/s11671-017-2023-y\">\u201cThe Characteristics of Binary Spike-Time-Dependent Plasticity in HfO2-Based RRAM and Applications for Pattern Recognition,<\/a>\u201d Nanoscale Research Letters (NRL) 2017, 12(1), p.244.<\/li>\n<li>P. Huang, D. B. Zhu, C. Liu, Z. Zhou, <strong>Zhen Dong<\/strong>, H. Jiang, W. S. Shen, L. F. Liu, X. Y. Liu, and J. F. Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8268435?casa_token=BfF7b6YTRwkAAAAA:w2ofmqW0B6slcvWLT5mphiheNhdiznmPYxw3bZ06BBBiEq-QuNRXjmN5S-nMzQZU8efyXhzNWg\">\u201cRTN based Oxygen Vacancy Probing Method for Ox-RRAM Reliability Characterization and Its Application in Tail Bits,<\/a>\u201d International Electron Devices Meeting (IEDM) 2017, pp. 21-4.<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-anchor=\"research-experience\" data-css_id=\"xcd2860\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-research-experience tb_xcd2860 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_80pv865 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_0onh865   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Research Experience<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_abul865    hide-tablet hide-mobile\" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h3>Tenure-Track Assistant Professor, UCSB\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 <span style=\"font-size: 22px; font-weight: normal; letter-spacing: -0.02em;\">Starting Fall 2026<\/span><\/h3>\n<h3>Senior\/Staff Research Scientist, NVIDIA\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 <span style=\"font-size: 22px; font-weight: normal; letter-spacing: -0.02em;\">Jun 2025 \u2014 present<\/span><\/h3>\n<h3>Founding Member, Nexusflow AI\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0<span style=\"font-size: 22px; font-weight: normal;\">Jun 2023 \u2014 Jun 2025<\/span><\/h3>\n<h3>Ph.D.\/Postdoc, Berkeley AI Research (BAIR), UC Berkeley\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0<span style=\"font-size: 22px; font-weight: normal;\">Aug 2018 \u2014 Jun 2023<\/span><\/h3>\n<p>Advisor: Prof.<strong> Kurt Keutzer<\/strong><\/p>\n<p><strong>Research on Hessian-AWare Quantization: HAWQ (ICCV\u201919), HAWQ-V2 (NeurIPS\u201920), ZeroQ (CVPR\u201920), HAP (WACV\u201922), Quantization Review (BLPCV\u201922), QD-BEV (ICCV\u201923), NoisyQuant (CVPR\u201923)<\/strong><\/p>\n<ul>\n<li>Propose a Hessian-based method to decide mixed-precision configuration and block-wise fine-tuning order.<\/li>\n<li>Prove theorem to use the trace of Hessian as sensitivity metric and conduct fast Pareto frontier optimization.<\/li>\n<li>Generalize to segmentation, 2D\/3D object detection tasks and achieve state-of-the-art results.<\/li>\n<li>Conduct fast end-to-end quantization without fine-tuning and without using any training\/test data.<\/li>\n<\/ul>\n<p><strong>Research on HW-SW Co-design: HAWQ-V3 (ICML\u201921), CoDeNet (FPGA\u201921), HAO (FCCM\u201921), CSQ (DAC\u201923), EPIM (DAC\u201924)<\/strong><\/p>\n<ul>\n<li>Achieve hardware-aware quantization and utilize 4-bit Tensor Cores for inference acceleration.<\/li>\n<li>Implement 4-bit kernels and mixed-precision on TVM, achieve 7.4x compression and 5.4x speedup against fp32.<\/li>\n<li>Propose efficient deformable op on embedded FPGAs, design new FPGA-core with ultra-low precision arithmetic.<\/li>\n<li>HW-SW joint architecture search and efficient implementation of mixed-precision NNs on CPU\/GPU\/FPGAs\/PIM.<\/li>\n<\/ul>\n<p><strong>Research on Efficient LLMs and Diffusion Models: Q-BERT (AAAI\u201920), Q-Diffusion (ICCV\u201923), SqueezeLLM (ICML\u201924), PB-LLM (ICLR\u201924)<\/strong><\/p>\n<ul>\n<li>Propose sensitivity-based non-uniform quantization and dense-and-sparse decompose for handling of outliers.<\/li>\n<li>Pioneer the usage of Hessian information to guide LLM quantization in both PTQ and QAT.<\/li>\n<li>Implement 3\/4-bit CUDA kernels and achieve 4.6x compression compared to fp16 and 2.4x speedup on an A6000.<\/li>\n<li>Propose timestep-aware calibration and split shortcut quantize to achieve 4-bit diffusion models at the first time.<\/li>\n<\/ul>\n<p><strong>Research on Multi-agent Systems: MAgIC (EMNLP\u201924)<\/strong><\/p>\n<ul>\n<li>Pioneer the integration of probabilistic graphical modeling (PGM) to enhance the cognitive abilities of LLMs.<\/li>\n<li>Present a framework to evaluate LLM-powered multi-agent systems by employing social deduction games.<\/li>\n<\/ul>\n<p><strong>Research on Image &amp; Video Generative Models: PromptCoT (CVPR\u201924), ViewControl (IJCAI\u201924), D-Edit (AAAI\u201925), Meissonic, Magic-Me, VEditBench, K-Sort Arena<\/strong><\/p>\n<ul>\n<li>Propose new methods to achieve better control ability of generative diffusion models.<\/li>\n<li>Develop novel efficient Arena algorithms for human-in-the-loop evaluation and alignment.<\/li>\n<li>Present Meissonic-1B that elevates masked image modeling (MIM) text-to-image models to SDXL-level.<\/li>\n<\/ul>\n<p><strong>Research on AI for Science: FastML (Frontiers in Big Data\u201922), High-Momentum Particle Trigger (TRETS\u201924)<\/strong><\/p>\n<ul>\n<li>Review AI inference acceleration methods and how they help dark matter search, morphology characterization, etc.<\/li>\n<li>Implement efficient AI on ASICs and FPGAs to reduce time cost and enable particle trigger decisions at CERN LHC.<\/li>\n<\/ul>\n<h3>Research Intern, Bytedance AI Lab\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0\u00a0<span style=\"font-size: 22px; font-weight: normal;\">Jan 2023 \u2014 Apr 2023<\/span><\/h3>\n<h3>Research Intern, NVIDIA AI Lab\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0<span style=\"font-size: 22px; font-weight: normal;\">May 2021 \u2014 Aug 2021<\/span><\/h3>\n<h3>Research Intern, Facebook AI Research\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0<span style=\"font-size: 22px; font-weight: normal;\">May 2020 \u2014 Aug 2020<\/span><\/h3>\n<hr>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_b9p0865    hide-tablet hide-mobile\" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h3>Undergraduate Visiting Researcher Program (UGVR), Stanford University<\/h3>\n<p>Advisor: Prof.<strong> H.-S. Philip Wong<\/strong><\/p>\n<h3>Research Intern, SenseTime AI Lab<\/h3>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_8tml865    hide-tablet hide-mobile\" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h3>Research Assistant, EECS School, Peking University<\/h3>\n<p>Advisor: Prof.<strong> Jinfeng Kang<\/strong>\u00a0 \u00a0 \u00a0 \u00a0 \u00a0\u00a0<\/p>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-anchor=\"teaching\" data-css_id=\"csx451\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-teaching tb_csx451 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_uiyv51 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_hraj51   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Teaching<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_bg8651   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>Head Graduate Student Instructor for Applications of Parallel Computers, <a href=\"https:\/\/sites.google.com\/lbl.gov\/cs267-spr2022\/home\">Berkeley CS 267<\/a>.<\/li>\n<li>Course Coordinator for Online Course Applications of Parallel Computers on <a href=\"https:\/\/moodle.xsede.org\/\">Moodle XSEDE<\/a>.<\/li>\n<li>Graduate Student Instructor\u00a0for Optimization Analytics, Berkeley INDENG 240.<\/li>\n<li>Graduate Student Instructor\u00a0for Mathematical Programming, Berkeley INDENG 262A.<\/li>\n<li>BAIR Mentoring Program for Underrepresented Undergraduates.<\/li>\n<li>Representative Courses I took at UC Berkeley:<br>Visual Object and Activity Recognition (4.00)<br>RISC-V CPU on FPGA Lab (4.00)<br>Digital Circuits and Computer Architecture (4.00)<br>Applications of Parallel Computers (4.00)<br>Statistical Learning Theory (4.00)<br>Convex Optimization and Approximation (4.00)<\/li>\n<li>Representative Courses I took at Peking University:<br>Digital Logic (4.00)<br>Principles of Digital Integrated Circuits (4.00)<br>Analog Circuits (3.99)<br>Advanced Analog Integrated Circuits Design (3.99)<br>Micro-Nano Integrated System (4.00)<br>Fundamentals of Solid State Physics (3.98)<br>Fundamentals of Semiconductor Materials (3.97)<br>Physics of Semiconductor (3.98)<br>Semiconductor Device Physics (3.98)<br>Principle of Integrated Circuits Process (3.99)<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-parallax-bg=\"desktop\" data-anchor=\"opensource\" data-css_id=\"9zyt860\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-opensource tb_9zyt860 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_ge9o865 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_1dib865   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Opensource<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_afo6865   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>NVIDIA-Nemotron-3-Nano-30B: [<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8\">huggingface_fp8<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Nemotron-CC-v2\">pre-training_dataset<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Nemotron-Math-Proofs-v1\">post-training_dataset<\/a>].<\/li>\n<li>NVIDIA-Nemotron-Nano-9B-V2: [<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-9B-v2\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-9B-v2-Base\">huggingface 9B base<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-12B-v2-Base\">huggingface 12B base<\/a>][<a href=\"https:\/\/huggingface.co\/collections\/nvidia\/nemotron-pre-training-dataset-689d9de36f84279d83786b35\">pre-training dataset<\/a>].<\/li>\n<li>Llama-Nemotron-Super-49B-V1.5: [<a href=\"https:\/\/huggingface.co\/nvidia\/Llama-3_3-Nemotron-Super-49B-v1_5\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/Llama-3_3-Nemotron-Super-49B-v1_5-FP8\">8-bit model<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Llama-Nemotron-Post-Training-Dataset\">post-training dataset<\/a>].<\/li>\n<li>DrafterBench: Benchmarking Large Language Models for Tasks Automation in Civil Engineering: [<a href=\"https:\/\/github.com\/Eason-Li-AIS\/DrafterBench\">github<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Eason666\/DrafterBench\">huggingface<\/a>].<\/li>\n<li>R-KV: Redundancy-aware KV Cache Compression for Reasoning Models: [<a href=\"https:\/\/github.com\/Zefan-Cai\/R-KV\">github<\/a>][<a href=\"https:\/\/zefan-cai.github.io\/R-KV.page\/\">website<\/a>].<\/li>\n<li>Magic-Me: [<a href=\"https:\/\/github.com\/Zhen-Dong\/Magic-Me\">github]<\/a>[<a href=\"https:\/\/magic-me-webpage.github.io\/\">website<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/visionMaze\/Magic-Me\">demo<\/a>], voted best in <a href=\"https:\/\/huggingface.co\/papers?date=2024-02-15\">Huggingface Daily Paper Recommendations<\/a>.<\/li>\n<li>NexusRaven: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusRaven\">github<\/a>][<a href=\"https:\/\/huggingface.co\/Nexusflow\/NexusRaven-13B\">huggingface<\/a>], NexusRaven-V2: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusRaven-V2\">github<\/a>], Athene-V2-Agent: [<a href=\"https:\/\/huggingface.co\/Nexusflow\/Athene-V2-Agent\">huggingface<\/a>], NexusBench: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusBench\">github<\/a>].<\/li>\n<li>NexusRaven-V2-13B: [<a href=\"https:\/\/huggingface.co\/Nexusflow\/NexusRaven-V2-13B\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/Nexusflow\/NexusRaven-V2-Demo\">demo<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/Nexusflow\/Nexus_Function_Calling_Leaderboard\">leaderboard<\/a>], 378 likes, 61k+ downloads. Rank Top-5 on Huggingface Trending when released.<\/li>\n<li>Meissonic: Masked Generative Transformers for Efficient High-Resolution Text-to-Image Synthesis: [<a href=\"https:\/\/github.com\/viiika\/Meissonic\">github<\/a>][<a href=\"https:\/\/huggingface.co\/MeissonFlow\/Meissonic\">huggingface<\/a>].<\/li>\n<li>K-Sort Arena: Efficient Benchmarking for Generative Models via K-wise Human Preferences: [<a href=\"https:\/\/huggingface.co\/spaces\/ksort\/K-Sort-Arena\">huggingface<\/a>].<\/li>\n<li>SqueezeLLM: Dense-and-Sparse Quantization, [<a href=\"https:\/\/github.com\/SqueezeAILab\/SqueezeLLM\">github<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2306.07629\">paper<\/a>].<\/li>\n<li>Q-Diffusion: Quantizing Diffusion Models, [<a href=\"https:\/\/github.com\/Xiuyu-Li\/q-diffusion\">github<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2302.04304\">paper<\/a>].<\/li>\n<li>Awesome Quantization Papers,\u00a0<a href=\"https:\/\/github.com\/Zhen-Dong\/Awesome-Quantization-Papers\">[github]<\/a>.<\/li>\n<li>D-Edit: Versatile Image Editing with Disentangled Control: [<a href=\"https:\/\/huggingface.co\/spaces\/Collov-Labs\/d-edit\">huggingface<\/a>]<\/li>\n<li>LOVEU-TGVE (Text-Guided Video Editing) Dataset and Benchmark, [<a href=\"https:\/\/github.com\/showlab\/loveu-tgve-2023\">github<\/a>][<a href=\"https:\/\/sites.google.com\/view\/loveucvpr23\/track4\">homepage<\/a>].<\/li>\n<li>HAWQV3: Dyadic Neural Network Quantization, <a href=\"https:\/\/github.com\/Zhen-Dong\/HAWQ\">[github]<\/a><a href=\"http:\/\/proceedings.mlr.press\/v139\/yao21a.html\">[paper]<\/a>.<\/li>\n<li>ZeroQ: A novel Zero-Shot Quantization Framework, <a href=\"https:\/\/github.com\/amirgholami\/ZeroQ\">[github]<\/a><a href=\"https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Cai_ZeroQ_A_Novel_Zero_Shot_Quantization_Framework_CVPR_2020_paper.html\">[paper]<\/a>.<\/li>\n<li>CoDeNet: Efficient Deployment of Input-Adaptive Object Detection on Embedded FPGAs, <a href=\"https:\/\/github.com\/Zhen-Dong\/CoDeNet\">[github]<\/a><a href=\"https:\/\/arxiv.org\/abs\/2006.08357\">[paper]<\/a>.<\/li>\n<li>HAP: Hessian-Aware Pruning and Optimal Neural Implant, <a href=\"https:\/\/github.com\/yaozhewei\/HAP#hessian-aware-pruning-and-optimal-neural-implant\">[github]<\/a><a href=\"https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Cai_ZeroQ_A_Novel_Zero_Shot_Quantization_Framework_CVPR_2020_paper.html\">[paper]<\/a>.<\/li>\n<li>BitPack: Tool to Efficiently Save Ultra-low Precision\/Mixed-precision Quantized Models, <a href=\"https:\/\/github.com\/Zhen-Dong\/BitPack\">[github]<\/a>.<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_5cny560   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Industry Collaborations<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_uyxm2   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>NVIDIA, Intel, Amazon, Alibaba, Panasonic, ByteDance, Google, Meta, Apple, AMD, Nexusflow.ai, Samsung, Tesla.<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-anchor=\"talks-events\" data-css_id=\"zdsq527\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-talks-events tb_zdsq527 tf_w\">\n                        <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_fq1a527 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_xtsm527   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Talks, Media &amp; Events:<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_k7uf527   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>AgentHard won 3rd place at\u00a0<a href=\"https:\/\/rdi.berkeley.edu\/agentx-agentbeats.html\">AgentX-AgentBeats Competition 2026<\/a> (computer-use agent track sponsored by DeepMind).<\/li>\n<li>Two papers (TS-Attn &amp; K-Sort Eval) accepted to ICLR 2026.<\/li>\n<li>R-KV presented at NeurIPS 2025, <em>Measure of All Measures<\/em> won Best Paper Award at NeurIPS LLM Eval Workshop.<\/li>\n<li><span style=\"font-weight: bold;\">ZenAI Lab<\/span> at UCSB is recruiting new Postdoc, PhD, MS, Interns. Send an email to apply. <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/1979622537018230633\">Link to Zhihu<\/a>.<\/li>\n<li>NVIDIA-Nemotron-3-Nano-30B achieved <a href=\"https:\/\/www.linkedin.com\/posts\/artificial-analysis_nvidia-has-just-released-nemotron-3-nano-activity-7406367757669322752-5lhn?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">AA Index 52,<\/a> same as gpt-oss-20b (high), with up to 3x throughput compared to similar-sized models. NVIDIA <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia_today-we-announced-the-nvidia-nemotron-3-activity-7406354145550708737-Ofn3?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Official Post<\/a>, <a href=\"https:\/\/blog.vllm.ai\/2025\/12\/15\/run-nvidia-nemotron-3-nano.html\">vLLM Blog<\/a>, <a href=\"https:\/\/lmsys.org\/blog\/2025-12-15-run-nvidia-nemotron-3-nano\/\">LMSys Blog<\/a>, AI era (\u65b0\u667a\u5143) <a href=\"https:\/\/36kr.com\/p\/3598962448777476\">Link to Post<\/a>, Synced AI (\u673a\u5668\u4e4b\u5fc3) <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/1984324430109880520\">Link to Post<\/a>. Ranked #1 on Huggingface Trending Models.<\/li>\n<li>I served as a panelist at the event\u00a0<a href=\"https:\/\/partiful.com\/e\/etu0TQxKzPgxszpJwU5z\">RL: The future of AI Agents<\/a> at <a href=\"https:\/\/www.tech-week.com\/\">SFTechWeek 2025 by a16z<\/a>.<\/li>\n<li>NVIDIA-Nemotron-Nano-V2 8B is pretrained from scratch and can outperform Qwen3 8B. NVIDIA AI <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia-ai_were-excited-to-share-leaderboard-topping-activity-7363339119726362624-MWso?utm_source=li_share&amp;utm_content=feedcontent&amp;utm_medium=g_dt_web&amp;utm_campaign=copy\">Official Post<\/a>, NVIDIA ADLR <a href=\"https:\/\/research.nvidia.com\/labs\/adlr\/NVIDIA-Nemotron-Nano-2\/\">Link to Post<\/a>, AI era (\u65b0\u667a\u5143) <a href=\"https:\/\/36kr.com\/p\/3429071613021830\">Link to Post<\/a>, Q-bit AI (\u91cf\u5b50\u4f4d) <a href=\"https:\/\/www.qbitai.com\/2025\/08\/323442.html\">Link to Post<\/a>.<\/li>\n<li>Llama-Nemotron-Super-V1.5 is on NVIDIA AI <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia-ai_announcing-llama-nemotron-super-v15-activity-7354662280682221568-piFT?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Official Post<\/a>, Q-bit AI (\u91cf\u5b50\u4f4d) <a href=\"https:\/\/www.qbitai.com\/2025\/07\/315193.html\">Link to Post<\/a>. Ranked #1 among open-sourced LLMs on the <a href=\"https:\/\/www.linkedin.com\/posts\/zhen-dong_llama-nemotron-super-v15-got-aa-intelligence-activity-7355800927632592897-xE-3?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Artificial Analysis Intelligence Index<\/a>.<\/li>\n<li>I served as a panelist at the <a href=\"https:\/\/www.glogda.org\/2025ggds\">Global Green Development Summit (GGDS) 2025<\/a>.<\/li>\n<li>DrafterBench gets recommended by AI era (\u65b0\u667a\u5143), <a href=\"https:\/\/36kr.com\/p\/3383937448131585\">Link to Post<\/a>.<\/li>\n<li>R-KV gets recommended by Q-bit AI (\u91cf\u5b50\u4f4d), <a href=\"https:\/\/www.qbitai.com\/2025\/06\/295072.html\">Link to Post<\/a>.<\/li>\n<li>Meissonic gets recommended by AI era (\u65b0\u667a\u5143)\u00a0and 36Kr, <a href=\"https:\/\/36kr.com\/p\/3033039401808133\">Link to Post<\/a>.<\/li>\n<li>\n<p>K-Sort Arena gets recommended by Qingke Lab (\u9752\u7a1eAI), <a href=\"https:\/\/qingkelab.github.io\/2024\/11\/04\/AIGC\/2024-11-04\/\">Link to Post<\/a>.<\/p>\n<\/li>\n<li>\n<p>I presented \u201cEfficient Deep Learning via Quantization and Co-Design\u201d at <a href=\"https:\/\/cvpr.thecvf.com\/Conferences\/2024\/CallForDoctoralConsortium\">CVPR 2024 Doctoral Consortium<\/a> and <a href=\"https:\/\/www.dac.com\/Attend\/Students-Scholarships\/PhD-Forum\">DAC 2024 PhD Forum<\/a>.<\/p>\n<\/li>\n<li>\n<p>I co-organized the <a href=\"https:\/\/sites.google.com\/view\/loveucvpr24\/home\">LOVEU (LOng-form VidEo Understanding) workshop<\/a> at CVPR 2024.<\/p>\n<\/li>\n<li>\n<p>Q-Diffusion is featured in the newest\u00a0<a href=\"https:\/\/developer.nvidia.com\/blog\/tensorrt-accelerates-stable-diffusion-nearly-2x-faster-with-8-bit-post-training-quantization\">TensorRT post<\/a>.<\/p>\n<\/li>\n<li>\n<p>I co-organized the <a href=\"https:\/\/practical-dl.github.io\/\">3rd Workshop on Practical Deep Learning: Towards Efficient and Reliable LLMs<\/a> <span style=\"background-color: initial;\">at IEEE Conference on Artificial Intelligence (IEEE CAI) 2024.<\/span><\/p>\n<\/li>\n<li><span style=\"background-color: initial;\">NexusRaven-V2-13B is presented at\u00a0<\/span><a style=\"background-color: initial;\" href=\"https:\/\/nips.cc\/Exhibitors\/exhibitorinfo\">NeurIPS 2023 EXPO<\/a><span style=\"background-color: initial;\">.<\/span><\/li>\n<li>Media on NexusRaven and NexusRaven-V2: <a href=\"https:\/\/nexusflow.ai\/blogs\/ravenv2\">Nexusflow.AI Official Blog<\/a>, <a href=\"https:\/\/deci.ai\/blog\/small-giants-top-10-under-13b-llms-in-open-source\/\">Deci AI Top 10 Under-13B LLMs<\/a>,\u00a0<a href=\"https:\/\/medium.com\/@datadrifters\/nexusraven-v2-13b-surpasses-gpt-4-in-function-calling-for-single-nested-and-parallel-calls-d3b266aee896\">Medium Article1<\/a>, <a href=\"https:\/\/medium.com\/@multiplatform.ai\/introducing-nexusraven-v2-a-13b-llm-dominating-gpt-4-in-zero-shot-function-calling-cbc7d1de1b9c\">Medium Article 2<\/a>, <a href=\"https:\/\/siliconangle.com\/2023\/12\/05\/nexusflows-latest-ai-model-nexusraven-v2-outperforms-gpt-4-software-tool-usage\/\">Siliconangle Article<\/a>, <a href=\"https:\/\/finance.yahoo.com\/news\/nexusflow-unveils-open-source-generative-140500544.html?guccounter=1&amp;guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&amp;guce_referrer_sig=AQAAAGaQ0nPzOjmRE_34zPJGGTCoyc4MBuDMwiJ53RrCnDTbcNA4iTyRxKKIhgfnS5KKS9QCOkJZONlEnhjelw21L2fpqHJ3q8cO-wWdsDyuPCW4ajNXtUpojaF9HCcVUsVwyanC4D9Lw5EGvOyn5QILhS-0BuNmGOCIV3WsvjJ1cE-Q\">Yahoo Finance Article<\/a>, <a href=\"https:\/\/www.businesswire.com\/news\/home\/20231205329018\/en\/Nexusflow-Unveils-Open-source-Generative-AI-That-Empowers-Copilots-to-Use-Tools-and-Outperforms-GPT-4\">Business Wire Article<\/a>, <a href=\"https:\/\/twitter.com\/ClementDelangue\/status\/1732138699901809042\">Huggingface\u2019s Post<\/a>, <a href=\"https:\/\/www.marktechpost.com\/2023\/12\/12\/meet-nexusraven-v2-a-13b-llm-outperforming-gpt-4-in-zero-shot-function-calling-and-has-the-capability-to-turn-natural-language-instructions-into-executable-code\/\">Mark Tech Post<\/a>, <a href=\"https:\/\/www.analyticsvidhya.com\/blog\/2023\/12\/how-nexusflows-nexusraven-v2-beats-gpt-4-at-its-own-game\/\">Analytics Vidhya Article<\/a>, <a href=\"https:\/\/www.linkedin.com\/posts\/togethercomputer_new-open-source-model-now-available-nexusraven-v2-activity-7137858804720816129-EHyo\">Together AI\u2019s Post<\/a>, <a href=\"https:\/\/news.ycombinator.com\/item?id=38527374\">Post on YC Hacker News<\/a>, Meta Llama&#8217;s Newsletter,\u00a0<a href=\"https:\/\/ollama.ai\/library\/nexusraven\">Ollama AI\u2019s Post<\/a>, etc. Ranked #5 on Huggingface Trending Models.<\/li>\n<li>Invited Talk &#8220;<span style=\"color: #2341eb;\"><a href=\"https:\/\/www.oneapi.io\/event-sessions\/efficient-inference-and-training-of-large-neural-network-models-ai-2023\/\">Efficient Inference and Training of Large Neural Network Models<\/a><\/span>&#8221; at\u00a0<a href=\"https:\/\/www.oneapi.io\/events\/oneapi-devsummit-for-ai-and-hpc-2023\/\">Intel oneAPI DevSummit<\/a>\u00a0for AI and HPC, on Aug 21, 2023.<\/li>\n<li>Invited Talk &#8220;Hardware-Aware Efficient Deep Learning&#8221; at Peking University Institute of Artificial Intelligence (PKU-IAI), on June 11, 2023.<\/li>\n<li>I co-organized the <a href=\"https:\/\/sites.google.com\/view\/loveucvpr23\/home\">LOVEU (LOng-form VidEo Understanding) workshop<\/a> at CVPR 2023, <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/629737628\">Link to Zhihu<\/a>.<\/li>\n<li>Invited to host the <a href=\"https:\/\/practical-dl.github.io\/\">Practical DL Workshop<\/a> at AAAI 2023 in Washington DC.<\/li>\n<li>Invited Talk &#8220;Efficient Deep Learning via Quantization and HW-SW Co-Design&#8221; at <a href=\"https:\/\/sites.google.com\/rice.edu\/iccad-halo-2022\/schedule\" target=\"_blank\" rel=\"noopener\">Hardware and Algorithms for Learning On-a-chip Workshop (HALO)<\/a> in ICCAD 2022.<\/li>\n<li>Invited Talk &#8220;<a href=\"https:\/\/www.oneapi.io\/event-sessions\/accelerating-pytorch-deep-learning-models-on-intel-xpus-ai-hpc-2022\/\">Efficient Inference and Training of Large Neural Network Models<\/a>&#8221; at\u00a0<a href=\"https:\/\/www.oneapi.io\/events\/oneapi-devsummit-for-ai-and-hpc-2022\/\">Intel oneAPI DevSummit<\/a> for AI and HPC, on Dec 6, 2022.<\/li>\n<li>My dissertation on &#8220;<a href=\"https:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2022\/EECS-2022-231.html\">Hardware-aware Efficient Deep Learning<\/a>&#8221; was defended on June 29, 2022.<\/li>\n<li>&#8220;Efficient Neural Networks through Systematic Quantization and Co-Design&#8221;, virtually at <a href=\"https:\/\/www.imperial.ac.uk\/matchlab\/\">Matchlab (Imperial College London)<\/a>, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/ICL2022Talk.pdf\">slides<\/a>].<\/li>\n<li>CoDeNet and HAO are presented at ML@B Seminar (Machine Learning at Berkeley).<\/li>\n<li>&#8220;Hessian-Aware Pruning and Optimal Neural Implant&#8221;, WACV 2022, Hawaii, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/HAP_WACV2022.pdf\">slides<\/a>].<\/li>\n<li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2021, Berkeley, US.<\/li>\n<li>The book that I contributed to, &#8220;<a href=\"https:\/\/www.routledge.com\/Low-Power-Computer-Vision-Improve-the-Efficiency-of-Artificial-Intelligence\/Thiruvathukal-Lu-Kim-Chen-Chen\/p\/book\/9780367744700\">Low-Power Computer Vision: Improve the Efficiency of Artificial Intelligence<\/a>&#8220;, is online for ordering.<\/li>\n<li>&#8220;HAO: Hardware-aware neural Architecture Optimization for Efficient Inference&#8221;, <a href=\"https:\/\/video.computer.org\/fccm\/FCCM%202021\/Session%202%20Machine%20Learning%201%20(Inference%20and%20Time-Series%20Prediction).mp4.html\">FCCM 2021<\/a> (online).<\/li>\n<li>&#8220;HAWQ-V2: Hessian Aware trace-Weighted Quantization of Neural Networks&#8221;, <a href=\"https:\/\/neurips.cc\/virtual\/2020\/public\/poster_d77c703536718b95308130ff2e5cf9ee.html\">NeurIPS 2020<\/a>.<\/li>\n<li>HAWQ-V2 gets recommended by JiangMen (\u5c06\u95e8) AI media, <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/338405208\">Link to ZhiHu<\/a>.<\/li>\n<li>&#8220;Systematic Neural Network Quantization&#8221;, <a href=\"https:\/\/www.nvidia.com\/en-us\/on-demand\/session\/gtcspring21-s31702\/\">NVIDIA GTC 2021<\/a>.<\/li>\n<li>&#8220;Efficient Neural Networks through Systematic Quantization&#8221;, <a href=\"https:\/\/citris-uc.org\/news-events\/bair-seminar-series\/\">BAIR\/CPAR\/BDD Seminar 2020<\/a>, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/BAIR_Seminar.pdf\">slides<\/a>].<\/li>\n<li>&#8220;HAWQ-V3: Dyadic Neural Network Quantization&#8221; is presented at <a href=\"https:\/\/tvmconf.org\/\">TVM Conference 2020<\/a>.<\/li>\n<li>&#8220;ZeroQ: A novel Zero-Shot Quantization Framework&#8221;, Real-Time Intelligent Secure Explainable Systems (RISELab) Retreat 2020, Lake Tahoe (online), US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/ZeroQ_riseretreat2020.pdf\">slides<\/a>].<\/li>\n<li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2020, Santa Rosa, US.<\/li>\n<li>&#8220;Q-BERT: Hessian Based Quantization of BERT&#8221;, AAAI 2020, New York, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/AAAI2020_QBERT.pdf\">slides<\/a>].<\/li>\n<li>Q-BERT gets recommended by Synced (\u673a\u5668\u4e4b\u5fc3) AI media (in Chinese), <a href=\"https:\/\/mp.weixin.qq.com\/s\/0qBlnsUqI2I-h-pFSgcQig\">Link to WeChat<\/a>.<\/li>\n<li>Q-BERT gets recommended by AI.Science (Aggregate Intellect), <a href=\"https:\/\/www.youtube.com\/watch?v=aX4Tm1s01wY\">Link to YouTube<\/a>.<\/li>\n<li>&#8220;Hessian-Aware trace-Weighted Quantization&#8221;, <a href=\"https:\/\/sites.google.com\/site\/optneurips19\/\">Beyond First-Order Methods in ML Workshop<\/a> at NeurIPS 2019, Vancouver, Canada.<\/li>\n<li>Real-Time Intelligent Secure Explainable Systems (RISELab) Retreat 2019, Monterey, US.<\/li>\n<li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2019, Berkeley, US.<\/li>\n<li>Visual Wake Word Challenge, <a href=\"https:\/\/rebootingcomputing.ieee.org\/lpirc\/2019\">LPIRC Workshop<\/a> at CVPR 2019, Long Beach, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/CVPR2019_LPIRC_VWW.pdf\">slides<\/a>], <a href=\"https:\/\/ieeetv.ieee.org\/visual-wake-words-challenge-aakanksha-chowdhery-lpirc-2019\">[link]<\/a>.<\/li>\n<li>&#8220;RRAM Based Convolutional Neural Networks for High Accuracy Pattern Recognition and Online Learning Tasks&#8221;, <a href=\"https:\/\/vlsisymposium.org\/2017\/\">VLSI-<\/a><a href=\"http:\/\/annex.jsap.or.jp\/snw\/index.html\">SNW<\/a> 2017, Kyoto, Japan, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/SNW_Presentation_ZhenDong.pdf\">slides<\/a>].<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_h19q527   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Service<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text --><!-- module text -->\n<div  class=\"module module-text tb_gcsu527   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <ul>\n<li>Reviewer for TNNLS (IEEE Transactions on Neural Networks and Learning Systems), TMLR (Transactions of Machine Learning Research), TPAMI (Transactions on Pattern Analysis and Machine Intelligence), JMLR (Journal of Machine Learning Research), IEEE Micro, TED (IEEE Transactions on Electron Devices), PR (Pattern Recognition), TCSVT (IEEE Transactions on Circuits and Systems for Video Technology), OJCAS (IEEE Open Journal of Circuits and Systems), JCST (Journal of Computer Science and Technology) and Fundamental Research (Elsevier).<\/li>\n<li>Reviewer for NeurIPS, ICML, CVPR, ICCV, ICLR, EMNLP, AAAI (Senior PC), ECCV, IJCAI, WACV, KDD, MLSys, TinyML, ECV, BLPCV.<\/li>\n<\/ul>    <\/div>\n<\/div>\n<!-- \/module text -->        <\/div>\n                        <\/div>\n        <\/div>\n                        <div  data-parallax-bg=\"desktop\" data-anchor=\"contact\" data-css_id=\"6rht860\" data-lazy=\"1\" class=\"module_row themify_builder_row fullwidth_row_container tb_has_section tb_section-contact tb_6rht860 tf_w\">\n            <span class=\"builder_row_cover tf_abs\"><\/span>            <div class=\"row_inner col_align_top tb_col_count_1 tf_box tf_rel\">\n                        <div  data-lazy=\"1\" class=\"module_column tb-column col-full tb_gkfc865 first\">\n                    <!-- module text -->\n<div  class=\"module module-text tb_uczp865   \" data-lazy=\"1\">\n        <div  class=\"tb_text_wrap\">\n        <h2><strong>Contact<\/strong><\/h2>    <\/div>\n<\/div>\n<!-- \/module text -->        <div  data-lazy=\"1\" class=\"module_subrow themify_builder_sub_row tf_w col_align_top tb_col_count_2 tb_a1sr866\">\n                <div  data-lazy=\"1\" class=\"module_column sub_column col4-2 tb_a4eo866 first\">\n                    <!-- module icon -->\n<div  class=\"module module-icon tb_pag9866    icon_horizontal \" data-lazy=\"1\">\n            <div class=\"module-icon-item\">\n                                                                <em class=\"tf_boxblue\"><svg  class=\"tf_fa tf-fas-home\" aria-hidden=\"true\"><use href=\"#tf-fas-home\"><\/use><\/svg><\/em>\n                                                                            <span>UC Berkeley, CA, 94709<\/span>\n                                                            <\/div>\n    <\/div>\n<!-- \/module icon -->\n        <\/div>\n                    <div  data-lazy=\"1\" class=\"module_column sub_column col4-2 tb_z143866 last\">\n                    <!-- module icon -->\n<div  class=\"module module-icon tb_bijj866    icon_horizontal \" data-lazy=\"1\">\n            <div class=\"module-icon-item\">\n                                                                <em class=\"tf_box\"><svg  class=\"tf_fa tf-far-envelope\" aria-hidden=\"true\"><use href=\"#tf-far-envelope\"><\/use><\/svg><\/em>\n                                                                            <span>zhend@ucsb.edu<\/span>\n                                                            <\/div>\n    <\/div>\n<!-- \/module icon -->\n        <\/div>\n                    <\/div>\n                <\/div>\n                        <\/div>\n        <\/div>\n        <\/div>\n<!--\/themify_builder_content-->","protected":false},"excerpt":{"rendered":"<p>\u00a0 Homepage of Zhen Dong Assistant Professor at UCSB, NVIDIA AI Researcher Looking for Potential Postdoc, PhD, MS &amp; Interns DOWNLOAD RESUME Google Scholar Research Interests Efficient AI: Efficient MLLM, VideoGen &amp; Embodied AI LLM Post-Training, Model Compression, AI Systems Function-Calling Agents and Multi-Agent Systems Hardware-Software Co-Design and AI for Science Efficient Evaluation and Alignment [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-18","page","type-page","status-publish","hentry","has-post-title","has-post-date","has-post-category","has-post-tag","has-post-comment","has-post-author",""],"builder_content":"<h1>\u00a0<\/h1> <h1>Homepage of Zhen Dong<\/h1>\n<h6>Assistant Professor at UCSB, NVIDIA AI Researcher<\/h6> <h6>Looking for Potential Postdoc, PhD, MS &amp; Interns<\/h6>\n<a href=\"http:\/\/dong-zhen.com\/wp-content\/uploads\/CV_ZhenDong_7.8.pdf\" target=\"_blank\" rel=\"noopener\"> DOWNLOAD RESUME <\/a>\n<a href=\"https:\/\/scholar.google.com\/citations?user=czxMUzcAAAAJ&#038;hl=en\" target=\"_blank\" rel=\"noopener\"> Google Scholar <\/a>\n<h2><strong>Research Interests<\/strong><\/h2>\n<h3>Efficient AI: Efficient MLLM, VideoGen &amp; Embodied AI<\/h3> <h3>LLM Post-Training, Model Compression, AI Systems<\/h3> <h3>Function-Calling Agents and Multi-Agent Systems<\/h3> <h3>Hardware-Software Co-Design and AI for Science<\/h3> <h3>Efficient Evaluation and Alignment of Foundation Models<\/h3>\n<h2><strong>Education<\/strong><\/h2>\n<h3><strong>Ph.D.\/Postdoc at University of California, Berkeley<\/strong><\/h3> <h3><strong>B.S. at Peking University: Rank 1\/327 in EECS<\/strong><\/h3> <h3>\u00a0<\/h3>\n<h2><strong>Awards<\/strong><\/h2>\n<ul> <li> <h3>Winner of 2018-2020 Berkeley Fellowship.<\/h3> <\/li> <li> <h3>Winner of PhD Forum (Second Place) at DAC 2024.<\/h3> <\/li> <li> <h3>Doctoral Consortium at CVPR 2024.<\/h3> <\/li> <li> <h3>Best Paper Award at LLM Evaluation Workshop at NeurIPS 2025.<\/h3> <\/li> <li> <h3>Best Paper Nomination at Practical DL Workshop at AAAI 2023.<\/h3> <\/li> <li> <h3>1st Place in EMCC 2020 Competition on both Classification and Object Detection tracks.<\/h3> <\/li> <li> <h3>2nd Place in Visual Wake Word Challenge at CVPR 2019.<\/h3> <\/li> <li> <h3>AWS Research Credits Award and Google Cloud Research Credits Award.<\/h3> <\/li> <li> <h3>1st Place Research Funding Proposal at Berkeley Deep Drive (BDD) 2019<\/h3> <\/li> <li> <h3>Winner of SenseTime Scholarship in 2018<\/h3> <\/li> <li> <h3>Winner of Tang Lixin Scholarship for outstanding students in China (top 0.5%).<\/h3> <\/li> <li> <h3>Winner of Tang Lixin 1st Prize Scholarship for graduate students studying abroad (top 0.05%).<\/h3> <\/li> <li> <h3>Winner of Fang Zheng Scholarship (top 1%).<\/h3> <\/li> <li> <h3>1st Prize in the Chinese Olympiad in Physics and the Chinese Physics Competition for college students.<\/h3> <\/li> <li> <h3>Princeton University Math (PUMac) Competition: Top three among all participants in geometry group.<\/h3> <\/li> <li> <h3>Top Ten Undergraduate Research Award at PKU EECS.<\/h3> <\/li> <li> <h3>Outstanding Graduates at Peking University and Outstanding Graduates in Beijing.<\/h3> <\/li> <\/ul>\n<h2><strong>Publications<\/strong><\/h2>\n<ul> <li>Zhikai Li, Jiatong Li, Xuewen Liu, Wangbo Zhao, Pan Du, Kaicheng Zhou, Qingyi Gu, Yang You, <strong>Zhen Dong<\/strong>\u2709<strong>,<\/strong> Kurt Keutzer, <a href=\"https:\/\/openreview.net\/forum?id=rcdGXD2dfW\">\"K-Sort Eval: Efficient Preference Evaluation for Visual Generation via Corrected VLM-as-a-Judge,<\/a>\" ICLR 2026.<\/li> <li>Hongyu Zhang, Yufan Deng, Zilin Pan, Pengtao Jiang, Bo Li, Qibin Hou, Zhiyang Dou,\u00a0Zhen Dong, Daquan Zhou,\u00a0<a href=\"https:\/\/openreview.net\/forum?id=QixNhagZ9t\">\"TS-Attn: Temporal-wise Separable Attention for Multi-Event Video Generation,<\/a>\" ICLR 2026.<\/li> <li><a href=\"https:\/\/research.nvidia.com\/labs\/nemotron\/files\/NVIDIA-Nemotron-3-Nano-Technical-Report.pdf\">\"Nemotron 3 Nano: Open, Efficient Mixture-of-Experts Hybrid Mamba-Transformer Model for Agentic Reasoning,<\/a>\" Tech Report 2025.<\/li> <li><a href=\"https:\/\/arxiv.org\/pdf\/2508.14444\">\"NVIDIA Nemotron Nano2: Accurate and Efficient Hybrid Mamba-Transformer Reasoning Model,<\/a>\" Tech Report 2025.<\/li> <li>Zhikai Li, Xuewen Liu, Dongrong Fu, Jianquan Li, Qingyi Gu, Kurt Keutzer, Zhen Dong\u2709. <a href=\"https:\/\/arxiv.org\/abs\/2408.14468\">\"K-Sort Arena: Efficient and reliable benchmarking for generative models via K-wise human preferences,<\/a>\" CVPR 2025.<\/li> <li>Zhikai Li, Xiaoxuan Liu, Banghua Zhu, Zhen Dong, Qingyi Gu, Kurt Keutzer, <a href=\"https:\/\/arxiv.org\/abs\/2310.07147\">\"QFT: Quantized full-parameter tuning of LLMs with affordable resources,<\/a>\" Scaling Post-training for LLMs Workshop at ICLR 2026.<\/li> <li>Zefan Cai, Wen Xiao, ...\u00a0<strong>Zhen Dong<\/strong>, Anima Anandkumar, Abedelkadir Asi, Junjie Hu.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2505.24133\">\"R-KV: Redundancy-aware KV Cache Compression for Reasoning Models,<\/a>\" NeurIPS 2025.<\/li> <li><a href=\"https:\/\/openreview.net\/group?id=NeurIPS.cc\/2025\/Workshop\/LLM_Evaluation#tab-accept-oral\">\"Measure of All Measures: Quantifying LLM Benchmark Quality,<\/a>\" Jihan Yao, ...\u00a0<strong>Zhen Dong<\/strong>, ..., Best Paper Award at LLM Evaluation Workshop, NeurIPS 2025.<\/li> <li>Yinsheng Li, Zhen Dong\u2709, Yi Shao\u2709. <a href=\"https:\/\/arxiv.org\/abs\/2507.11527\">\"DrafterBench: Benchmarking Large Language Models for Tasks Automation in Civil Engineering,<\/a>\" arXiv 2025.<\/li> <li>Jinbin Bai, Tian Ye, Wei Chow, Enxin Song, Qinguo Chen, Xiangtai Li, <strong>Zhen Dong,<\/strong> Lei Zhu, Shuicheng Yan. <a href=\"https:\/\/arxiv.org\/abs\/2410.08261\">\"Meissonic: Revitalizing masked generative transformers for efficient high-resolution text-to-image synthesis,<\/a>\" ICLR 2025.<\/li> <li>Aosong Feng, Weikang Qiu, Jinbin Bai, Kaicheng Zhou,\u00a0<strong>Zhen Dong,<\/strong>\u00a0Xiao Zhang, Rex Ying, Leandros Tassiulas.\u00a0<a style=\"color: #2341eb;\" href=\"https:\/\/arxiv.org\/abs\/2403.04880\">\"An item is worth a prompt: versatile image editing with disentangled control,<\/a>\" AAAI 2025.<\/li> <li>Lin Xu, Zhiyuan Hu, Daquan Zhou\u2709, Hongyu Ren,\u00a0Zhen Dong\u2709,\u00a0Kurt Keutzer, See-Kiong Ng, Jiashi Feng.\u00a0<a href=\"https:\/\/openreview.net\/forum?id=VCS2ZPRg1m\">\"MAgIC: Investigation of large language model powered multi-agent in cognition, adaptability, rationality and collaboration,<\/a>\" EMNLP 2024.<\/li> <li>Chenyu Wang*,\u00a0<strong>Zhen Dong*\u2709,<\/strong>\u00a0Daquan Zhou*\u2709, Zhenhua Zhu, Yu Wang, Jiashi Feng, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2311.07620\">\"EPIM: Efficient Processing-In-Memory Accelerators based on Epitome,<\/a>\" DAC 2024.<\/li> <li>Jinbin Bai,\u00a0Zhen Dong,\u00a0Aosong Feng, Xiao Zhang, Tian Ye, Kaicheng Zhou, Mike Zheng Shou.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2310.16002\">\"Integrating View Conditions for Image Synthesis,<\/a>\" IJCAI 2024.<\/li> <li>Javier Campos, <strong>Zhen Dong,<\/strong> Javier Duarte, Amir Gholami, Michael W. Mahoney, Jovan Mitrevski, Nhan Tran. <a href=\"https:\/\/arxiv.org\/abs\/2304.06745\">\"End-to-end codesign of Hessian-aware quantized neural networks for FPGAs and ASICs,<\/a>\" ACM Transactions on Reconfigurable Technology and Systems (TRETS) 2024.<\/li> <li>Junyi Yao, Yijiang Liu, Zhen Dong, Mingfei Guo, Jiashi Feng, Kurt Keutzer, Li Du, Daquan Zhou, Shanghang Zhang. <a href=\"https:\/\/openreview.net\/forum?id=2pVogxJyDA\">\"PromptCoT: Align prompt distribution via adapted chain of thought,<\/a>\" CVPR 2024.<\/li> <li>Yuzhang Shang, Zhihang Yuan, Qiang Wu,\u00a0Zhen Dong<strong>\u2709<\/strong>.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2310.00034\">\"PB-LLM: Partially Binarized Large Language Models,<\/a>\" ICLR 2024.<\/li> <li>Lutfi Erdogan, VAR Kanakagiri, Kurt Keutzer, Zhen Dong<strong>\u2709<\/strong>. <a href=\"https:\/\/practical-dl.github.io\/2024\/long_paper\/17\/CameraReady\/17.pdf\">\"Stochastic Communication Avoidance for Recommendation Systems,<\/a>\" IEEE CAI 2024.<\/li> <li>Ze Ma, Daquan Zhou\u2709, Chun-Hsiao Yeh, Xue-She Wang, Xiuyu Li, Huanrui Yang,\u00a0<strong>Zhen Dong<\/strong><strong>\u2709<\/strong><strong>,<\/strong>\u00a0Kurt Keutzer, Jiashi Feng.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2402.09368\">\"Magic-Me: Identity-Specific Video Customized Diffusion,<\/a>\" AI4VA Workshop at ECCV 2024.<\/li> <li>Rongyu Zhang, Yulin Luo, Huanrui Yang,\u00a0Zhen Dong,\u00a0\u2026 &amp; Shanghang Zhang. <a href=\"https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/29622\">\"Efficient Deweahter Mixture-of-Experts with Uncertainty-Aware Feature-wise Linear Modulation,<\/a>\" AAAI 2024.<\/li> <li>Sehoon Kim, Coleman Hooper, Amir Gholami,\u00a0Zhen Dong,\u00a0Xiuyu Li, Sheng Shen, Michael W. Mahoney, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2306.07629\">\"SqueezeLLM: Dense-and-Sparse Quantization,<\/a>\" ICML 2024.<\/li> <li>Anthony Chen, Huanrui Yang, Yulu Gan, Denis A Gudovskiy,\u00a0<strong>Zhen Dong,<\/strong>\u00a0Haofan Wang, Tomoyuki Okuno, Yohei Nakata, Shanghang Zhang, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2312.09148\">\"Split-Ensemble: Efficient OOD-aware ensemble via task and model splitting,<\/a>\" ICML 2024.<\/li> <li>Zhihang Yuan, Yuzhang Shang, Yang Zhou,\u00a0Zhen Dong,\u00a0Chenhao Xue, Bingzhe Wu, Zhikai Li, Qingyi Gu, Yong Jae Lee, Yan Yan, Beidi Chen, Guangyu Sun, Kurt Keutzer.\u00a0<a href=\"https:\/\/arxiv.org\/abs\/2402.16363\">\"LLM Inference Unveiled: Survey and Roofline Model Insights,<\/a>\" arXiv 2024.<\/li> <li>Huanrui Yang, Yafeng Huang, Zhen Dong, Denis A Gudovskiy, Tomoyuki Okuno, Yohei Nakata, Yuan Du, Kurt Keutzer, Shanghang Zhang. <a href=\"https:\/\/arxiv.org\/abs\/2407.03442\">\"Fisher-aware Quantization for DETR Detectors with Critical-category Objectives,<\/a>\" WANT Workshop at ICML 2024.<\/li> <li>Yifan Zhang*,\u00a0<b>Zhen Dong*,<\/b> Huanrui Yang, Ming Lu, Cheng-Ching Tseng, Yandong Guo, Kurt Keutzer, Li Du, Shanghang Zhang. <a href=\"https:\/\/practical-dl.github.io\/2023\/extended_abstract\/22\/CameraReady\/22.pdf\">\"QD-BEV: Quantization-aware View-guided Distillation for Multi-view\u00a03D Object Detection,<\/a>\" ICCV 2023.<\/li> <li>Xiuyu Li, Yijiang Liu, Long Lian, Huanrui Yang, Zhen Dong, Daniel Kang, Shanghang Zhang, Kurt Keutzer. <a href=\"https:\/\/arxiv.org\/abs\/2302.04304\">\"Q-Diffusion: Quantizing Diffusion Models,<\/a>\" ICCV 2023.<\/li> <li>Venkat Srinivasan, Zhen Dong, Banghua Zhu, Brian Yu, Hanzi Mao, Damon Mosk-Aoyama, Kurt Keutzer, Jiantao Jiao, Jian Zhang. <a href=\"https:\/\/openreview.net\/pdf?id=5lcPe6DqfI\">\"NexusRaven: A Commercially-Permissive Language Model for Function Calling,<\/a>\" <a href=\"https:\/\/sites.google.com\/view\/fmdm-neurips23\">FMDM Workshop<\/a>\u00a0&amp; <a href=\"https:\/\/an-instructive-workshop.github.io\/\">Instruction Workshop<\/a> at NeurIPS 2023<\/li> <li>3D Object DetectionYijiang Liu, Huanrui Yang, <b>Zhen Dong,<\/b> Kurt Keutzer, Li Du, Shanghang Zhang. \"<a href=\"https:\/\/arxiv.org\/abs\/2211.16056\">NoisyQuant: Noisy Bias-Enhanced Post-Training Activation Quantization for Vision Transformers<\/a>,\" CVPR 2023.<\/li> <li>Lirui Xiao, Huanrui Yang, <b>Zhen Dong,<\/b> Kurt Keutzer, Li Du, Shanghang Zhang. \"<a href=\"https:\/\/arxiv.org\/abs\/2212.02770\">CSQ: Growing Mixed-Precision Quantization Scheme with Bi-level Continuous Sparsification<\/a>,\" DAC 2023.<\/li> <li>Yifan Zhang*, <b>Zhen Dong*,<\/b>\u00a0Huanrui Yang, Ming Lu, Cheng-Ching Tseng, Yandong Guo, Kurt Keutzer, Li Du, Shanghang Zhang. \"<a href=\"https:\/\/practical-dl.github.io\/2023\/extended_abstract\/22\/CameraReady\/22.pdf\">QD-BEV: Quantization-aware View-guided Distillation for 3D Object Detection<\/a>,\" Best Paper Nomination, Practical DL Workshop at AAAI 2023.<\/li> <li>Javier Campos, <b>Zhen Dong,<\/b> Javier Duarte, Amir Gholami, Michael Mahoney, Jovan Mitrevski and Nhan Tran. \"<a href=\"https:\/\/oscar-workshop.github.io\/Program_2023.html\">End-to-end codesign of Hessian-aware quantized neural networks for FPGAs and ASICs<\/a>,\"\u00a0OSCAR Workshop at ISCA 2023.<\/li> <li><strong>Zhen Dong*,\u00a0<\/strong>Kaicheng Zhou*, Guohao Li*, Qiang Zhou, Mingfei Guo, Bernard Ghanem, Kurt Keutzer, Shanghang Zhang. \"<a href=\"https:\/\/arxiv.org\/abs\/2205.02162\">UnrealNAS: Can We Search Neural Architectures with Unreal Data?<\/a>\" DAC 2023 Workshop.<\/li> <li>Tian Li, Xiang Chen,\u00a0<strong>Zhen Dong,\u00a0<\/strong>Weijiang Yu, Yijun Yan, Shanghang Zhang, Kurt Keutzer. \"<a href=\"https:\/\/www.ijcai.org\/proceedings\/2022\/0585.pdf\">Domain-Adaptive Text Classification with Structured Knowledge from Unlabeled Data<\/a>,\" Long Oral, IJCAI-ECAI 2022.<\/li> <li><b>Zhen Dong.<\/b>\u00a0\"<a style=\"font-family: Arial, Helvetica, sans-serif;\" href=\"https:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2022\/EECS-2022-231.html\">Hardware-aware Efficient Deep Learning<\/a>,\"\u00a0PhD Thesis, 2022.<\/li> <li>Shixing Yu*, Zhewei Yao*, Amir Gholami*,\u00a0<strong>Zhen Dong*<\/strong>, Michael W. Mahoney, and Kurt Keutzer. \"<a href=\"https:\/\/openaccess.thecvf.com\/content\/WACV2022\/html\/Yu_Hessian-Aware_Pruning_and_Optimal_Neural_Implant_WACV_2022_paper.html\">Hessian-Aware Pruning and Optimal Neural Implant<\/a>,\" Oral, WACV 2022.<\/li> <li>Allison McCarn Deiana, Nhan Tran, ...\u00a0<strong>Zhen Dong,\u00a0<\/strong>... Olivia Weng. \"<a href=\"https:\/\/arxiv.org\/abs\/2110.13041\">Applications and Techniques for Fast Machine Learning in Science<\/a>,\" Frontiers in Big Data 2022.<\/li> <li><strong>Zhen Dong*,\u00a0<\/strong>Yizhao Gao*, Qijing Huang, John Wawrzynek, Hayden K.H. So, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/abs\/2104.12766\">HAO: Hardware-aware Neural Architecture Optimization for Efficient Inference<\/a>,\" Oral, FCCM 2021.<\/li> <li><strong>Zhen Dong*,\u00a0<\/strong>Dequan Wang*, Qijing Huang*, Yizhao Gao, Yaohui Cai, Tian Li, Bichen Wu, Kurt Keutzer, John Wawrzynek. \"<a href=\"https:\/\/arxiv.org\/abs\/2006.08357\">CoDeNet: Algorithm-hardware Co-design for Deformable Convolution<\/a>,\" Oral, FPGA 2021.<\/li> <li>Zhewei Yao*, <strong>Zhen Dong*,<\/strong> Zhangcheng Zheng*, Amir Gholami*, Jiali Yu, Eric Tan, Leyuan Wang, Qijing Huang, Yida Wang, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"http:\/\/proceedings.mlr.press\/v139\/yao21a.html\">HAWQV3: Dyadic Neural Network Quantization<\/a>,\" ICML 2021.<\/li> <li>Amir Gholami*, Sehoon Kim*,\u00a0<strong>Zhen Dong*,<\/strong>\u00a0Zhewei Yao*, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/abs\/2103.13630\">A Survey of Quantization Methods for Efficient Neural Network Inference<\/a>,\" BLPCV (<a href=\"https:\/\/www.routledge.com\/Low-Power-Computer-Vision-Improve-the-Efficiency-of-Artificial-Intelligence\/Thiruvathukal-Lu-Kim-Chen-Chen\/p\/book\/9780367744700\">Book of Low-Power Computer Vision<\/a>) 2021.<\/li> <li>Tian Li, Xiang Chen, Shanghang Zhang, <strong>Zhen Dong<\/strong>, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/abs\/2012.02943\">Cross-Domain Sentiment Classification with In-Domain Contrastive Learning<\/a>,\" short version at NeurIPS 2020 SSL Workshop, long version at ICASSP 2021.<\/li> <li><strong>Zhen Dong,<\/strong>\u00a0Zhewei Yao, Yaohui Cai, Daiyaan Arfeen, Amir Gholami, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/abs\/1911.03852\">HAWQ-V2: Hessian Aware trace-Weighted Quantization of Neural Networks<\/a>,\" NeurIPS 2020.<\/li> <li>Yaohui Cai*, Zhewei Yao*,<strong>\u00a0Zhen Dong*,<\/strong> Amir Gholami, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/pdf\/2001.00281.pdf\">ZeroQ: A Novel Zero Shot Quantization Framework,<\/a>\" CVPR 2020.<\/li> <li>Sheng Shen<strong>*<\/strong>,<strong> Zhen Dong*, <\/strong>Jiayu Ye<strong>*<\/strong>, Linjian Ma, Zhewei Yao, Amir Gholami, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/Q-BERT.pdf\">Q-BERT: Hessian Based Ultra Low Precision Quantization of BERT<\/a>,\" Spotlight, AAAI 2020.<\/li> <li><strong>Zhen Dong,<\/strong> Zhewei Yao, Daiyaan Arfeen, Yaohui Cai, Michael Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/sites.google.com\/site\/optneurips19\/\">Trace Weighted Hessian-Aware Quantization<\/a>,\" Oral, Opt-Workshop, NeurIPS 2019.<\/li> <li>Q. Huang, D. Wang, Y. Gao, Y. Cai, <strong>Zhen Dong<\/strong>, B. Wu, K. Keutzer and J. Wawrzynek. \"<a href=\"https:\/\/www.emc2-workshop.com\/neurips-19\">Algorithm-hardware Co-design for Deformable Convolution<\/a>,\" Oral, EMC2-Workshop, NeurIPS 2019.<\/li> <li><strong>Zhen Dong<\/strong>, Yaohui Cai, Amir Gholami, Tianjun Zhang, Kurt Keutzer. \"<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/2019_Visual_Wake_Words.pdf\">Ultra-low Bit Quantization for Visual Wake Word Challenge<\/a>,\" 2nd Place at VWW Competition, CVPR 2019.<\/li> <li><strong>Zhen Dong*, <\/strong>Zhewei Yao*, Amir Gholami*, Michael W. Mahoney, Kurt Keutzer. \"<a href=\"https:\/\/arxiv.org\/abs\/1905.03696\" target=\"_blank\" rel=\"noopener\">HAWQ: Hessian AWare Quantization of Neural Networks with Mixed-Precision<\/a>,\" ICCV 2019.<\/li> <li>Runze Han, Peng Huang, Yachen Xiang, Chen Liu,\u00a0<strong>Zhen Dong<\/strong>, et al. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8605508\">\"A Novel Convolution Computing Paradigm Based on NOR Flash Array with High Computing Speed and Energy Efficiency,<\/a>\" IEEE Transactions on Circuits and Systems (TCAS) 2019, p.1-12.<\/li> <li><strong>Zhen Dong<\/strong>, Zheng Zhou, Zefan Li, Peng Huang, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8558705?casa_token=30leNIKf22gAAAAA:BP_d1iHbINXQIKOGr7ZGBJyN0UhzVPl0bLbrkDumW_IYb9ktPd5EXlA_JQImfz3M3HujwLDH3A\">\"Convolutional Neural Networks for Image Recognition and Online Learning Based on RRAM Devices,<\/a>\" IEEE Transactions on Electron Devices (TED) 2018, p.793-801.<\/li> <li>Jinfeng Kang,<strong> Zhen Dong,<\/strong> Peng Huang, Renze Han, Lifeng Liu, Xiaoyan Liu. China patent about 3D RRAM.<\/li> <li>Huang, P., Li, Z., <strong>Zhen Dong<\/strong>, Han, R., Zhou, Z., Zhu, D., Liu, L., Liu, X. and Kang, J. <a href=\"https:\/\/pubs.acs.org\/doi\/full\/10.1021\/acsaelm.9b00011?casa_token=oPYiQIWubz8AAAAA%3AU22Ile8ZCapAPJzQC5QjDJ2LTDxtoE58snXHf5JPzJSXXUHbcLQGtfNgFCTF9tpM4bP5KmB0YJNcZyM\">\"Binary Resistive Switching Device Based Electronic Synapse with Spike-Rate-Dependent-Plasticity for Online Learning,<\/a>\" ACS Applied Electronic Materials 2018, pp. 845-853.<\/li> <li>Xinxin Wang, Peng Huang,<strong>\u00a0Zhen Dong<\/strong>, Zheng Zhou, Yuning Jiang, Runze Han, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8403854\">\u201cA Novel RRAM-based Adaptive-Threshold LIF Neuron Circuit for High Recognition Accuracy,<\/a>\u201d International Symposium on VLSI Technology, Systems and Applications (VLSI-TSA) 2018, pp. 1-2.<\/li> <li><strong>Zhen Dong<\/strong>, Z. Zhou, Z. F. Li, C. Liu, Y. N. Jiang, P. Huang, L. F. Liu, X. Y. Liu, and J. F. Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8242339?casa_token=P-71TMIKHnkAAAAA:JsD9t-U2MhI8JPaxNVze8w5nZViV4n39lDOhU5Ix3R9Y5drW4rna45EdPCwlTeat_pA-i34mkQ\">\"RRAM based convolutional neural networks for high accuracy pattern recognition and online learning tasks,<\/a>\" Oral, VLSI-SNW 2017, pp. 145-146. IEEE, 2017.<\/li> <li>Zheng Zhou, Chen Liu, Wensheng Shen, <strong>Zhen Dong<\/strong>, Zhe Chen, Peng Huang, Lifeng Liu, Xiaoyan Liu, Jinfeng Kang. <a href=\"https:\/\/link.springer.com\/article\/10.1186\/s11671-017-2023-y\">\u201cThe Characteristics of Binary Spike-Time-Dependent Plasticity in HfO2-Based RRAM and Applications for Pattern Recognition,<\/a>\u201d Nanoscale Research Letters (NRL) 2017, 12(1), p.244.<\/li> <li>P. Huang, D. B. Zhu, C. Liu, Z. Zhou, <strong>Zhen Dong<\/strong>, H. Jiang, W. S. Shen, L. F. Liu, X. Y. Liu, and J. F. Kang. <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8268435?casa_token=BfF7b6YTRwkAAAAA:w2ofmqW0B6slcvWLT5mphiheNhdiznmPYxw3bZ06BBBiEq-QuNRXjmN5S-nMzQZU8efyXhzNWg\">\u201cRTN based Oxygen Vacancy Probing Method for Ox-RRAM Reliability Characterization and Its Application in Tail Bits,<\/a>\u201d International Electron Devices Meeting (IEDM) 2017, pp. 21-4.<\/li> <\/ul>\n<h2><strong>Research Experience<\/strong><\/h2>\n<h3>Tenure-Track Assistant Professor, UCSB\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 Starting Fall 2026<\/h3> <h3>Senior\/Staff Research Scientist, NVIDIA\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 Jun 2025 \u2014 present<\/h3> <h3>Founding Member, Nexusflow AI\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0Jun 2023 \u2014 Jun 2025<\/h3> <h3>Ph.D.\/Postdoc, Berkeley AI Research (BAIR), UC Berkeley\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0Aug 2018 \u2014 Jun 2023<\/h3> <p>Advisor: Prof.<strong> Kurt Keutzer<\/strong><\/p> <p><strong>Research on Hessian-AWare Quantization: HAWQ (ICCV\u201919), HAWQ-V2 (NeurIPS\u201920), ZeroQ (CVPR\u201920), HAP (WACV\u201922), Quantization Review (BLPCV\u201922), QD-BEV (ICCV\u201923), NoisyQuant (CVPR\u201923)<\/strong><\/p> <ul> <li>Propose a Hessian-based method to decide mixed-precision configuration and block-wise fine-tuning order.<\/li> <li>Prove theorem to use the trace of Hessian as sensitivity metric and conduct fast Pareto frontier optimization.<\/li> <li>Generalize to segmentation, 2D\/3D object detection tasks and achieve state-of-the-art results.<\/li> <li>Conduct fast end-to-end quantization without fine-tuning and without using any training\/test data.<\/li> <\/ul> <p><strong>Research on HW-SW Co-design: HAWQ-V3 (ICML\u201921), CoDeNet (FPGA\u201921), HAO (FCCM\u201921), CSQ (DAC\u201923), EPIM (DAC\u201924)<\/strong><\/p> <ul> <li>Achieve hardware-aware quantization and utilize 4-bit Tensor Cores for inference acceleration.<\/li> <li>Implement 4-bit kernels and mixed-precision on TVM, achieve 7.4x compression and 5.4x speedup against fp32.<\/li> <li>Propose efficient deformable op on embedded FPGAs, design new FPGA-core with ultra-low precision arithmetic.<\/li> <li>HW-SW joint architecture search and efficient implementation of mixed-precision NNs on CPU\/GPU\/FPGAs\/PIM.<\/li> <\/ul> <p><strong>Research on Efficient LLMs and Diffusion Models: Q-BERT (AAAI\u201920), Q-Diffusion (ICCV\u201923), SqueezeLLM (ICML\u201924), PB-LLM (ICLR\u201924)<\/strong><\/p> <ul> <li>Propose sensitivity-based non-uniform quantization and dense-and-sparse decompose for handling of outliers.<\/li> <li>Pioneer the usage of Hessian information to guide LLM quantization in both PTQ and QAT.<\/li> <li>Implement 3\/4-bit CUDA kernels and achieve 4.6x compression compared to fp16 and 2.4x speedup on an A6000.<\/li> <li>Propose timestep-aware calibration and split shortcut quantize to achieve 4-bit diffusion models at the first time.<\/li> <\/ul> <p><strong>Research on Multi-agent Systems: MAgIC (EMNLP\u201924)<\/strong><\/p> <ul> <li>Pioneer the integration of probabilistic graphical modeling (PGM) to enhance the cognitive abilities of LLMs.<\/li> <li>Present a framework to evaluate LLM-powered multi-agent systems by employing social deduction games.<\/li> <\/ul> <p><strong>Research on Image &amp; Video Generative Models: PromptCoT (CVPR\u201924), ViewControl (IJCAI\u201924), D-Edit (AAAI\u201925), Meissonic, Magic-Me, VEditBench, K-Sort Arena<\/strong><\/p> <ul> <li>Propose new methods to achieve better control ability of generative diffusion models.<\/li> <li>Develop novel efficient Arena algorithms for human-in-the-loop evaluation and alignment.<\/li> <li>Present Meissonic-1B that elevates masked image modeling (MIM) text-to-image models to SDXL-level.<\/li> <\/ul> <p><strong>Research on AI for Science: FastML (Frontiers in Big Data\u201922), High-Momentum Particle Trigger (TRETS\u201924)<\/strong><\/p> <ul> <li>Review AI inference acceleration methods and how they help dark matter search, morphology characterization, etc.<\/li> <li>Implement efficient AI on ASICs and FPGAs to reduce time cost and enable particle trigger decisions at CERN LHC.<\/li> <\/ul> <h3>Research Intern, Bytedance AI Lab\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0\u00a0Jan 2023 \u2014 Apr 2023<\/h3> <h3>Research Intern, NVIDIA AI Lab\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0May 2021 \u2014 Aug 2021<\/h3> <h3>Research Intern, Facebook AI Research\u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0 \u00a0May 2020 \u2014 Aug 2020<\/h3> <hr>\n<h3>Undergraduate Visiting Researcher Program (UGVR), Stanford University<\/h3> <p>Advisor: Prof.<strong> H.-S. Philip Wong<\/strong><\/p> <h3>Research Intern, SenseTime AI Lab<\/h3>\n<h3>Research Assistant, EECS School, Peking University<\/h3> <p>Advisor: Prof.<strong> Jinfeng Kang<\/strong>\u00a0 \u00a0 \u00a0 \u00a0 \u00a0\u00a0<\/p>\n<h2><strong>Teaching<\/strong><\/h2>\n<ul> <li>Head Graduate Student Instructor for Applications of Parallel Computers, <a href=\"https:\/\/sites.google.com\/lbl.gov\/cs267-spr2022\/home\">Berkeley CS 267<\/a>.<\/li> <li>Course Coordinator for Online Course Applications of Parallel Computers on <a href=\"https:\/\/moodle.xsede.org\/\">Moodle XSEDE<\/a>.<\/li> <li>Graduate Student Instructor\u00a0for Optimization Analytics, Berkeley INDENG 240.<\/li> <li>Graduate Student Instructor\u00a0for Mathematical Programming, Berkeley INDENG 262A.<\/li> <li>BAIR Mentoring Program for Underrepresented Undergraduates.<\/li> <li>Representative Courses I took at UC Berkeley:<br>Visual Object and Activity Recognition (4.00)<br>RISC-V CPU on FPGA Lab (4.00)<br>Digital Circuits and Computer Architecture (4.00)<br>Applications of Parallel Computers (4.00)<br>Statistical Learning Theory (4.00)<br>Convex Optimization and Approximation (4.00)<\/li> <li>Representative Courses I took at Peking University:<br>Digital Logic (4.00)<br>Principles of Digital Integrated Circuits (4.00)<br>Analog Circuits (3.99)<br>Advanced Analog Integrated Circuits Design (3.99)<br>Micro-Nano Integrated System (4.00)<br>Fundamentals of Solid State Physics (3.98)<br>Fundamentals of Semiconductor Materials (3.97)<br>Physics of Semiconductor (3.98)<br>Semiconductor Device Physics (3.98)<br>Principle of Integrated Circuits Process (3.99)<\/li> <\/ul>\n<h2><strong>Opensource<\/strong><\/h2>\n<ul> <li>NVIDIA-Nemotron-3-Nano-30B: [<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8\">huggingface_fp8<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Nemotron-CC-v2\">pre-training_dataset<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Nemotron-Math-Proofs-v1\">post-training_dataset<\/a>].<\/li> <li>NVIDIA-Nemotron-Nano-9B-V2: [<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-9B-v2\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-9B-v2-Base\">huggingface 9B base<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/NVIDIA-Nemotron-Nano-12B-v2-Base\">huggingface 12B base<\/a>][<a href=\"https:\/\/huggingface.co\/collections\/nvidia\/nemotron-pre-training-dataset-689d9de36f84279d83786b35\">pre-training dataset<\/a>].<\/li> <li>Llama-Nemotron-Super-49B-V1.5: [<a href=\"https:\/\/huggingface.co\/nvidia\/Llama-3_3-Nemotron-Super-49B-v1_5\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/nvidia\/Llama-3_3-Nemotron-Super-49B-v1_5-FP8\">8-bit model<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/nvidia\/Llama-Nemotron-Post-Training-Dataset\">post-training dataset<\/a>].<\/li> <li>DrafterBench: Benchmarking Large Language Models for Tasks Automation in Civil Engineering: [<a href=\"https:\/\/github.com\/Eason-Li-AIS\/DrafterBench\">github<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Eason666\/DrafterBench\">huggingface<\/a>].<\/li> <li>R-KV: Redundancy-aware KV Cache Compression for Reasoning Models: [<a href=\"https:\/\/github.com\/Zefan-Cai\/R-KV\">github<\/a>][<a href=\"https:\/\/zefan-cai.github.io\/R-KV.page\/\">website<\/a>].<\/li> <li>Magic-Me: [<a href=\"https:\/\/github.com\/Zhen-Dong\/Magic-Me\">github]<\/a>[<a href=\"https:\/\/magic-me-webpage.github.io\/\">website<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/visionMaze\/Magic-Me\">demo<\/a>], voted best in <a href=\"https:\/\/huggingface.co\/papers?date=2024-02-15\">Huggingface Daily Paper Recommendations<\/a>.<\/li> <li>NexusRaven: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusRaven\">github<\/a>][<a href=\"https:\/\/huggingface.co\/Nexusflow\/NexusRaven-13B\">huggingface<\/a>], NexusRaven-V2: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusRaven-V2\">github<\/a>], Athene-V2-Agent: [<a href=\"https:\/\/huggingface.co\/Nexusflow\/Athene-V2-Agent\">huggingface<\/a>], NexusBench: [<a href=\"https:\/\/github.com\/nexusflowai\/NexusBench\">github<\/a>].<\/li> <li>NexusRaven-V2-13B: [<a href=\"https:\/\/huggingface.co\/Nexusflow\/NexusRaven-V2-13B\">huggingface<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/Nexusflow\/NexusRaven-V2-Demo\">demo<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/Nexusflow\/Nexus_Function_Calling_Leaderboard\">leaderboard<\/a>], 378 likes, 61k+ downloads. Rank Top-5 on Huggingface Trending when released.<\/li> <li>Meissonic: Masked Generative Transformers for Efficient High-Resolution Text-to-Image Synthesis: [<a href=\"https:\/\/github.com\/viiika\/Meissonic\">github<\/a>][<a href=\"https:\/\/huggingface.co\/MeissonFlow\/Meissonic\">huggingface<\/a>].<\/li> <li>K-Sort Arena: Efficient Benchmarking for Generative Models via K-wise Human Preferences: [<a href=\"https:\/\/huggingface.co\/spaces\/ksort\/K-Sort-Arena\">huggingface<\/a>].<\/li> <li>SqueezeLLM: Dense-and-Sparse Quantization, [<a href=\"https:\/\/github.com\/SqueezeAILab\/SqueezeLLM\">github<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2306.07629\">paper<\/a>].<\/li> <li>Q-Diffusion: Quantizing Diffusion Models, [<a href=\"https:\/\/github.com\/Xiuyu-Li\/q-diffusion\">github<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2302.04304\">paper<\/a>].<\/li> <li>Awesome Quantization Papers,\u00a0<a href=\"https:\/\/github.com\/Zhen-Dong\/Awesome-Quantization-Papers\">[github]<\/a>.<\/li> <li>D-Edit: Versatile Image Editing with Disentangled Control: [<a href=\"https:\/\/huggingface.co\/spaces\/Collov-Labs\/d-edit\">huggingface<\/a>]<\/li> <li>LOVEU-TGVE (Text-Guided Video Editing) Dataset and Benchmark, [<a href=\"https:\/\/github.com\/showlab\/loveu-tgve-2023\">github<\/a>][<a href=\"https:\/\/sites.google.com\/view\/loveucvpr23\/track4\">homepage<\/a>].<\/li> <li>HAWQV3: Dyadic Neural Network Quantization, <a href=\"https:\/\/github.com\/Zhen-Dong\/HAWQ\">[github]<\/a><a href=\"http:\/\/proceedings.mlr.press\/v139\/yao21a.html\">[paper]<\/a>.<\/li> <li>ZeroQ: A novel Zero-Shot Quantization Framework, <a href=\"https:\/\/github.com\/amirgholami\/ZeroQ\">[github]<\/a><a href=\"https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Cai_ZeroQ_A_Novel_Zero_Shot_Quantization_Framework_CVPR_2020_paper.html\">[paper]<\/a>.<\/li> <li>CoDeNet: Efficient Deployment of Input-Adaptive Object Detection on Embedded FPGAs, <a href=\"https:\/\/github.com\/Zhen-Dong\/CoDeNet\">[github]<\/a><a href=\"https:\/\/arxiv.org\/abs\/2006.08357\">[paper]<\/a>.<\/li> <li>HAP: Hessian-Aware Pruning and Optimal Neural Implant, <a href=\"https:\/\/github.com\/yaozhewei\/HAP#hessian-aware-pruning-and-optimal-neural-implant\">[github]<\/a><a href=\"https:\/\/openaccess.thecvf.com\/content_CVPR_2020\/html\/Cai_ZeroQ_A_Novel_Zero_Shot_Quantization_Framework_CVPR_2020_paper.html\">[paper]<\/a>.<\/li> <li>BitPack: Tool to Efficiently Save Ultra-low Precision\/Mixed-precision Quantized Models, <a href=\"https:\/\/github.com\/Zhen-Dong\/BitPack\">[github]<\/a>.<\/li> <\/ul>\n<h2><strong>Industry Collaborations<\/strong><\/h2>\n<ul> <li>NVIDIA, Intel, Amazon, Alibaba, Panasonic, ByteDance, Google, Meta, Apple, AMD, Nexusflow.ai, Samsung, Tesla.<\/li> <\/ul>\n<h2><strong>Talks, Media &amp; Events:<\/strong><\/h2>\n<ul> <li>AgentHard won 3rd place at\u00a0<a href=\"https:\/\/rdi.berkeley.edu\/agentx-agentbeats.html\">AgentX-AgentBeats Competition 2026<\/a> (computer-use agent track sponsored by DeepMind).<\/li> <li>Two papers (TS-Attn &amp; K-Sort Eval) accepted to ICLR 2026.<\/li> <li>R-KV presented at NeurIPS 2025, <em>Measure of All Measures<\/em> won Best Paper Award at NeurIPS LLM Eval Workshop.<\/li> <li>ZenAI Lab at UCSB is recruiting new Postdoc, PhD, MS, Interns. Send an email to apply. <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/1979622537018230633\">Link to Zhihu<\/a>.<\/li> <li>NVIDIA-Nemotron-3-Nano-30B achieved <a href=\"https:\/\/www.linkedin.com\/posts\/artificial-analysis_nvidia-has-just-released-nemotron-3-nano-activity-7406367757669322752-5lhn?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">AA Index 52,<\/a> same as gpt-oss-20b (high), with up to 3x throughput compared to similar-sized models. NVIDIA <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia_today-we-announced-the-nvidia-nemotron-3-activity-7406354145550708737-Ofn3?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Official Post<\/a>, <a href=\"https:\/\/blog.vllm.ai\/2025\/12\/15\/run-nvidia-nemotron-3-nano.html\">vLLM Blog<\/a>, <a href=\"https:\/\/lmsys.org\/blog\/2025-12-15-run-nvidia-nemotron-3-nano\/\">LMSys Blog<\/a>, AI era (\u65b0\u667a\u5143) <a href=\"https:\/\/36kr.com\/p\/3598962448777476\">Link to Post<\/a>, Synced AI (\u673a\u5668\u4e4b\u5fc3) <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/1984324430109880520\">Link to Post<\/a>. Ranked #1 on Huggingface Trending Models.<\/li> <li>I served as a panelist at the event\u00a0<a href=\"https:\/\/partiful.com\/e\/etu0TQxKzPgxszpJwU5z\">RL: The future of AI Agents<\/a> at <a href=\"https:\/\/www.tech-week.com\/\">SFTechWeek 2025 by a16z<\/a>.<\/li> <li>NVIDIA-Nemotron-Nano-V2 8B is pretrained from scratch and can outperform Qwen3 8B. NVIDIA AI <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia-ai_were-excited-to-share-leaderboard-topping-activity-7363339119726362624-MWso?utm_source=li_share&amp;utm_content=feedcontent&amp;utm_medium=g_dt_web&amp;utm_campaign=copy\">Official Post<\/a>, NVIDIA ADLR <a href=\"https:\/\/research.nvidia.com\/labs\/adlr\/NVIDIA-Nemotron-Nano-2\/\">Link to Post<\/a>, AI era (\u65b0\u667a\u5143) <a href=\"https:\/\/36kr.com\/p\/3429071613021830\">Link to Post<\/a>, Q-bit AI (\u91cf\u5b50\u4f4d) <a href=\"https:\/\/www.qbitai.com\/2025\/08\/323442.html\">Link to Post<\/a>.<\/li> <li>Llama-Nemotron-Super-V1.5 is on NVIDIA AI <a href=\"https:\/\/www.linkedin.com\/posts\/nvidia-ai_announcing-llama-nemotron-super-v15-activity-7354662280682221568-piFT?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Official Post<\/a>, Q-bit AI (\u91cf\u5b50\u4f4d) <a href=\"https:\/\/www.qbitai.com\/2025\/07\/315193.html\">Link to Post<\/a>. Ranked #1 among open-sourced LLMs on the <a href=\"https:\/\/www.linkedin.com\/posts\/zhen-dong_llama-nemotron-super-v15-got-aa-intelligence-activity-7355800927632592897-xE-3?utm_source=share&amp;utm_medium=member_desktop&amp;rcm=ACoAACS3K5kB9cup4wnShMmjph4gee18THhZci8\">Artificial Analysis Intelligence Index<\/a>.<\/li> <li>I served as a panelist at the <a href=\"https:\/\/www.glogda.org\/2025ggds\">Global Green Development Summit (GGDS) 2025<\/a>.<\/li> <li>DrafterBench gets recommended by AI era (\u65b0\u667a\u5143), <a href=\"https:\/\/36kr.com\/p\/3383937448131585\">Link to Post<\/a>.<\/li> <li>R-KV gets recommended by Q-bit AI (\u91cf\u5b50\u4f4d), <a href=\"https:\/\/www.qbitai.com\/2025\/06\/295072.html\">Link to Post<\/a>.<\/li> <li>Meissonic gets recommended by AI era (\u65b0\u667a\u5143)\u00a0and 36Kr, <a href=\"https:\/\/36kr.com\/p\/3033039401808133\">Link to Post<\/a>.<\/li> <li> <p>K-Sort Arena gets recommended by Qingke Lab (\u9752\u7a1eAI), <a href=\"https:\/\/qingkelab.github.io\/2024\/11\/04\/AIGC\/2024-11-04\/\">Link to Post<\/a>.<\/p> <\/li> <li> <p>I presented \u201cEfficient Deep Learning via Quantization and Co-Design\u201d at <a href=\"https:\/\/cvpr.thecvf.com\/Conferences\/2024\/CallForDoctoralConsortium\">CVPR 2024 Doctoral Consortium<\/a> and <a href=\"https:\/\/www.dac.com\/Attend\/Students-Scholarships\/PhD-Forum\">DAC 2024 PhD Forum<\/a>.<\/p> <\/li> <li> <p>I co-organized the <a href=\"https:\/\/sites.google.com\/view\/loveucvpr24\/home\">LOVEU (LOng-form VidEo Understanding) workshop<\/a> at CVPR 2024.<\/p> <\/li> <li> <p>Q-Diffusion is featured in the newest\u00a0<a href=\"https:\/\/developer.nvidia.com\/blog\/tensorrt-accelerates-stable-diffusion-nearly-2x-faster-with-8-bit-post-training-quantization\">TensorRT post<\/a>.<\/p> <\/li> <li> <p>I co-organized the <a href=\"https:\/\/practical-dl.github.io\/\">3rd Workshop on Practical Deep Learning: Towards Efficient and Reliable LLMs<\/a> at IEEE Conference on Artificial Intelligence (IEEE CAI) 2024.<\/p> <\/li> <li>NexusRaven-V2-13B is presented at\u00a0<a style=\"background-color: initial;\" href=\"https:\/\/nips.cc\/Exhibitors\/exhibitorinfo\">NeurIPS 2023 EXPO<\/a>.<\/li> <li>Media on NexusRaven and NexusRaven-V2: <a href=\"https:\/\/nexusflow.ai\/blogs\/ravenv2\">Nexusflow.AI Official Blog<\/a>, <a href=\"https:\/\/deci.ai\/blog\/small-giants-top-10-under-13b-llms-in-open-source\/\">Deci AI Top 10 Under-13B LLMs<\/a>,\u00a0<a href=\"https:\/\/medium.com\/@datadrifters\/nexusraven-v2-13b-surpasses-gpt-4-in-function-calling-for-single-nested-and-parallel-calls-d3b266aee896\">Medium Article1<\/a>, <a href=\"https:\/\/medium.com\/@multiplatform.ai\/introducing-nexusraven-v2-a-13b-llm-dominating-gpt-4-in-zero-shot-function-calling-cbc7d1de1b9c\">Medium Article 2<\/a>, <a href=\"https:\/\/siliconangle.com\/2023\/12\/05\/nexusflows-latest-ai-model-nexusraven-v2-outperforms-gpt-4-software-tool-usage\/\">Siliconangle Article<\/a>, <a href=\"https:\/\/finance.yahoo.com\/news\/nexusflow-unveils-open-source-generative-140500544.html?guccounter=1&amp;guce_referrer=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbS8&amp;guce_referrer_sig=AQAAAGaQ0nPzOjmRE_34zPJGGTCoyc4MBuDMwiJ53RrCnDTbcNA4iTyRxKKIhgfnS5KKS9QCOkJZONlEnhjelw21L2fpqHJ3q8cO-wWdsDyuPCW4ajNXtUpojaF9HCcVUsVwyanC4D9Lw5EGvOyn5QILhS-0BuNmGOCIV3WsvjJ1cE-Q\">Yahoo Finance Article<\/a>, <a href=\"https:\/\/www.businesswire.com\/news\/home\/20231205329018\/en\/Nexusflow-Unveils-Open-source-Generative-AI-That-Empowers-Copilots-to-Use-Tools-and-Outperforms-GPT-4\">Business Wire Article<\/a>, <a href=\"https:\/\/twitter.com\/ClementDelangue\/status\/1732138699901809042\">Huggingface\u2019s Post<\/a>, <a href=\"https:\/\/www.marktechpost.com\/2023\/12\/12\/meet-nexusraven-v2-a-13b-llm-outperforming-gpt-4-in-zero-shot-function-calling-and-has-the-capability-to-turn-natural-language-instructions-into-executable-code\/\">Mark Tech Post<\/a>, <a href=\"https:\/\/www.analyticsvidhya.com\/blog\/2023\/12\/how-nexusflows-nexusraven-v2-beats-gpt-4-at-its-own-game\/\">Analytics Vidhya Article<\/a>, <a href=\"https:\/\/www.linkedin.com\/posts\/togethercomputer_new-open-source-model-now-available-nexusraven-v2-activity-7137858804720816129-EHyo\">Together AI\u2019s Post<\/a>, <a href=\"https:\/\/news.ycombinator.com\/item?id=38527374\">Post on YC Hacker News<\/a>, Meta Llama's Newsletter,\u00a0<a href=\"https:\/\/ollama.ai\/library\/nexusraven\">Ollama AI\u2019s Post<\/a>, etc. Ranked #5 on Huggingface Trending Models.<\/li> <li>Invited Talk \"<a href=\"https:\/\/www.oneapi.io\/event-sessions\/efficient-inference-and-training-of-large-neural-network-models-ai-2023\/\">Efficient Inference and Training of Large Neural Network Models<\/a>\" at\u00a0<a href=\"https:\/\/www.oneapi.io\/events\/oneapi-devsummit-for-ai-and-hpc-2023\/\">Intel oneAPI DevSummit<\/a>\u00a0for AI and HPC, on Aug 21, 2023.<\/li> <li>Invited Talk \"Hardware-Aware Efficient Deep Learning\" at Peking University Institute of Artificial Intelligence (PKU-IAI), on June 11, 2023.<\/li> <li>I co-organized the <a href=\"https:\/\/sites.google.com\/view\/loveucvpr23\/home\">LOVEU (LOng-form VidEo Understanding) workshop<\/a> at CVPR 2023, <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/629737628\">Link to Zhihu<\/a>.<\/li> <li>Invited to host the <a href=\"https:\/\/practical-dl.github.io\/\">Practical DL Workshop<\/a> at AAAI 2023 in Washington DC.<\/li> <li>Invited Talk \"Efficient Deep Learning via Quantization and HW-SW Co-Design\" at <a href=\"https:\/\/sites.google.com\/rice.edu\/iccad-halo-2022\/schedule\" target=\"_blank\" rel=\"noopener\">Hardware and Algorithms for Learning On-a-chip Workshop (HALO)<\/a> in ICCAD 2022.<\/li> <li>Invited Talk \"<a href=\"https:\/\/www.oneapi.io\/event-sessions\/accelerating-pytorch-deep-learning-models-on-intel-xpus-ai-hpc-2022\/\">Efficient Inference and Training of Large Neural Network Models<\/a>\" at\u00a0<a href=\"https:\/\/www.oneapi.io\/events\/oneapi-devsummit-for-ai-and-hpc-2022\/\">Intel oneAPI DevSummit<\/a> for AI and HPC, on Dec 6, 2022.<\/li> <li>My dissertation on \"<a href=\"https:\/\/www2.eecs.berkeley.edu\/Pubs\/TechRpts\/2022\/EECS-2022-231.html\">Hardware-aware Efficient Deep Learning<\/a>\" was defended on June 29, 2022.<\/li> <li>\"Efficient Neural Networks through Systematic Quantization and Co-Design\", virtually at <a href=\"https:\/\/www.imperial.ac.uk\/matchlab\/\">Matchlab (Imperial College London)<\/a>, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/ICL2022Talk.pdf\">slides<\/a>].<\/li> <li>CoDeNet and HAO are presented at ML@B Seminar (Machine Learning at Berkeley).<\/li> <li>\"Hessian-Aware Pruning and Optimal Neural Implant\", WACV 2022, Hawaii, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/HAP_WACV2022.pdf\">slides<\/a>].<\/li> <li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2021, Berkeley, US.<\/li> <li>The book that I contributed to, \"<a href=\"https:\/\/www.routledge.com\/Low-Power-Computer-Vision-Improve-the-Efficiency-of-Artificial-Intelligence\/Thiruvathukal-Lu-Kim-Chen-Chen\/p\/book\/9780367744700\">Low-Power Computer Vision: Improve the Efficiency of Artificial Intelligence<\/a>\", is online for ordering.<\/li> <li>\"HAO: Hardware-aware neural Architecture Optimization for Efficient Inference\", <a href=\"https:\/\/video.computer.org\/fccm\/FCCM%202021\/Session%202%20Machine%20Learning%201%20(Inference%20and%20Time-Series%20Prediction).mp4.html\">FCCM 2021<\/a> (online).<\/li> <li>\"HAWQ-V2: Hessian Aware trace-Weighted Quantization of Neural Networks\", <a href=\"https:\/\/neurips.cc\/virtual\/2020\/public\/poster_d77c703536718b95308130ff2e5cf9ee.html\">NeurIPS 2020<\/a>.<\/li> <li>HAWQ-V2 gets recommended by JiangMen (\u5c06\u95e8) AI media, <a href=\"https:\/\/zhuanlan.zhihu.com\/p\/338405208\">Link to ZhiHu<\/a>.<\/li> <li>\"Systematic Neural Network Quantization\", <a href=\"https:\/\/www.nvidia.com\/en-us\/on-demand\/session\/gtcspring21-s31702\/\">NVIDIA GTC 2021<\/a>.<\/li> <li>\"Efficient Neural Networks through Systematic Quantization\", <a href=\"https:\/\/citris-uc.org\/news-events\/bair-seminar-series\/\">BAIR\/CPAR\/BDD Seminar 2020<\/a>, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/BAIR_Seminar.pdf\">slides<\/a>].<\/li> <li>\"HAWQ-V3: Dyadic Neural Network Quantization\" is presented at <a href=\"https:\/\/tvmconf.org\/\">TVM Conference 2020<\/a>.<\/li> <li>\"ZeroQ: A novel Zero-Shot Quantization Framework\", Real-Time Intelligent Secure Explainable Systems (RISELab) Retreat 2020, Lake Tahoe (online), US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/ZeroQ_riseretreat2020.pdf\">slides<\/a>].<\/li> <li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2020, Santa Rosa, US.<\/li> <li>\"Q-BERT: Hessian Based Quantization of BERT\", AAAI 2020, New York, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/AAAI2020_QBERT.pdf\">slides<\/a>].<\/li> <li>Q-BERT gets recommended by Synced (\u673a\u5668\u4e4b\u5fc3) AI media (in Chinese), <a href=\"https:\/\/mp.weixin.qq.com\/s\/0qBlnsUqI2I-h-pFSgcQig\">Link to WeChat<\/a>.<\/li> <li>Q-BERT gets recommended by AI.Science (Aggregate Intellect), <a href=\"https:\/\/www.youtube.com\/watch?v=aX4Tm1s01wY\">Link to YouTube<\/a>.<\/li> <li>\"Hessian-Aware trace-Weighted Quantization\", <a href=\"https:\/\/sites.google.com\/site\/optneurips19\/\">Beyond First-Order Methods in ML Workshop<\/a> at NeurIPS 2019, Vancouver, Canada.<\/li> <li>Real-Time Intelligent Secure Explainable Systems (RISELab) Retreat 2019, Monterey, US.<\/li> <li>Berkeley AI Research (BAIR)\/ Berkeley Deep Drive (BDD) Workshop 2019, Berkeley, US.<\/li> <li>Visual Wake Word Challenge, <a href=\"https:\/\/rebootingcomputing.ieee.org\/lpirc\/2019\">LPIRC Workshop<\/a> at CVPR 2019, Long Beach, US, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/CVPR2019_LPIRC_VWW.pdf\">slides<\/a>], <a href=\"https:\/\/ieeetv.ieee.org\/visual-wake-words-challenge-aakanksha-chowdhery-lpirc-2019\">[link]<\/a>.<\/li> <li>\"RRAM Based Convolutional Neural Networks for High Accuracy Pattern Recognition and Online Learning Tasks\", <a href=\"https:\/\/vlsisymposium.org\/2017\/\">VLSI-<\/a><a href=\"http:\/\/annex.jsap.or.jp\/snw\/index.html\">SNW<\/a> 2017, Kyoto, Japan, [<a href=\"https:\/\/dong-zhen.com\/wp-content\/uploads\/SNW_Presentation_ZhenDong.pdf\">slides<\/a>].<\/li> <\/ul>\n<h2><strong>Service<\/strong><\/h2>\n<ul> <li>Reviewer for TNNLS (IEEE Transactions on Neural Networks and Learning Systems), TMLR (Transactions of Machine Learning Research), TPAMI (Transactions on Pattern Analysis and Machine Intelligence), JMLR (Journal of Machine Learning Research), IEEE Micro, TED (IEEE Transactions on Electron Devices), PR (Pattern Recognition), TCSVT (IEEE Transactions on Circuits and Systems for Video Technology), OJCAS (IEEE Open Journal of Circuits and Systems), JCST (Journal of Computer Science and Technology) and Fundamental Research (Elsevier).<\/li> <li>Reviewer for NeurIPS, ICML, CVPR, ICCV, ICLR, EMNLP, AAAI (Senior PC), ECCV, IJCAI, WACV, KDD, MLSys, TinyML, ECV, BLPCV.<\/li> <\/ul>\n<h2><strong>Contact<\/strong><\/h2>\n<em><svg aria-hidden=\"true\"><use href=\"#tf-fas-home\"><\/use><\/svg><\/em> UC Berkeley, CA, 94709\n<em><svg aria-hidden=\"true\"><use href=\"#tf-far-envelope\"><\/use><\/svg><\/em> zhend@ucsb.edu","_links":{"self":[{"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/pages\/18","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/comments?post=18"}],"version-history":[{"count":1187,"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/pages\/18\/revisions"}],"predecessor-version":[{"id":1525,"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/pages\/18\/revisions\/1525"}],"wp:attachment":[{"href":"https:\/\/dong-zhen.com\/wp-json\/wp\/v2\/media?parent=18"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}