index.html

<!DOCTYPE HTML>
<html lang="en">
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">

    <title>Kun-Yu Lin</title>

    <meta name="author" content="Kun-Yu Lin">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="shortcut icon" href="images/favicon/favicon.ico" type="image/x-icon">
    <link rel="stylesheet" type="text/css" href="stylesheet.css">
    
  </head>

  <body>
    <table style="width:100%;max-width:800px;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;vertical-align:middle"><tbody>
      <tr style="padding:0px">
        <td style="padding:0px">
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
            <tr style="padding:0px">
              <td style="padding:2.5%;width:63%;vertical-align:middle">
                <p class="name" style="text-align: center;">
                  Kun-Yu Lin 
                </p>
                <p style="font-size:16px;">
		I am now a post-doctoral research fellow at the University of Hong Kong, under the supervision of <a href="https://www.kaihan.org/">Prof. Kai Han</a>. 
		I obtained my PhD degree from Sun Yat-set University, under the supervision of <a href="https://www.isee-ai.cn/~zhwshi/index.html">Prof. Wei-Shi Zheng</a>. 
		Prior to this, I obtained my Bachelor's degree and Master degree from Sun Yat-Sen University.
		During my PhD, I was fortunate to have the opportunity to study as a visiting student at MMLab@NTU, under the supervision of <a href="https://www.mmlab-ntu.com/person/ccloy/">Prof. Chen Change Loy</a> and <a href="https://henghuiding.github.io/">Prof. Henghui Ding</a>.  
                My research interests include computer vision and machine learning. 
                </p>
                <p style="text-align:center">
                  <a href="mailto:kunyulin14@outlook.com">Email</a> &nbsp;/&nbsp;
                  <a href="https://scholar.google.com/citations?hl=en&user=tkUBeeQAAAAJ">Scholar</a> &nbsp;/&nbsp;
                  <a href="https://github.com/kunyulin/">Github</a>
                </p>
              </td>
              <td style="padding:2.5%;width:40%;max-width:40%">
                <a href="images/JonBarron.jpg"><img style="width:80%;max-width:80%;object-fit: cover; border-radius: 50%;" alt="profile photo" src="images/kunyu.jpg" class="hoverZoomLink"></a>
              </td>
            </tr>
          </tbody></table>
		
	  <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
		  <tr>
              	  <td style="padding:20px;width:100%;vertical-align:middle">
		  <h2>News</h2>
		  </td>
		  </tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 12/2024:</b> One paper was accepted to AAAI2025.  
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 07/2024:</b> One paper was accepted to TPAMI.  
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 03/2024:</b> Releasing <a href="https://arxiv.org/abs/2403.01560">XOV-Action</a>, the first cross-domain open-vocabulary action recognition benchmark!
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 09/2023:</b> One paper was accepted to NeurIPS2023.  
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 09/2023:</b> One paper was accepted to TPAMI.  
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 07/2023:</b> One paper was accepted to ICCV2023.  
		  </td></tr>
		  <tr style="font-size:15px;"> <td>&nbsp; &nbsp; &nbsp; &nbsp; <b>&#10053 03/2023:</b> Two papers were accepted to CVPR2023.  
		  </td></tr>
		<tr>  <td> &nbsp; </td></tr>
          </tbody></table>
		
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
              <tr>
              <td style="padding:20px;width:100%;vertical-align:middle">
                <h2>Selected Works</h2>
                <p style="font-size:15px;">
                Most of my research works are about human video understanding, transferable, generalizable and trustworthy deep learning, and vision-language models. 
		Some works are <span class="highlight">highlighted</span>.
		# denotes equal contributions. 
                </p>
              </td>
            </tr>
          </tbody></table>
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>

    <tr onmouseout="xov_action()" onmouseover="xov_action()" bgcolor="#ffffd0">
<!--     <tr onmouseout="nuvo_stop()" onmouseover="nuvo_start()"> -->
      <td style="padding:20px;width:25%;vertical-align:middle">
<!--         <div class="one"> -->
<!--           <div class="two" id='xovaction'><video  width=100% muted autoplay loop>
          <source src="images/nuvo.mp4" type="video/mp4">
          Your browser does not support the video tag.
          </video></div> -->
          <img src='images/xovaction.png' width=100%>
<!--         </div> -->
<!--         <script type="text/javascript">
          function nuvo_start() {
            document.getElementById('nuvo_image').style.opacity = "1";
          }

          function nuvo_stop() {
            document.getElementById('nuvo_image').style.opacity = "0";
          }
          nuvo_stop()
        </script> -->
      </td>
	    
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2403.01560">
	<span class="papertitle">Rethinking CLIP-based Video Learners in Cross-Domain Open-Vocabulary Action Recognition</span>
        </a>
        <br>
        <strong>Kun-Yu Lin</strong>, Henghui Ding, Jiaming Zhou, Yu-Ming Tang, Yi-Xing Peng, Zhilin Zhao, Chen Change Loy, Wei-Shi Zheng
<!--         <a href="https://bmild.github.io/">Ben Mildenhall</a> -->
        <br>
        <em>arXiv</em>, 2024
        <br>
        <a href="https://arxiv.org/abs/2403.01560">arXiv</a>
        /
        <a href="https://github.com/KunyuLin/XOV-Action/">github</a>
        <p></p>
        <p>
	The first benchmark, named <a href="https://github.com/KunyuLin/XOV-Action/">XOV-Action</a>, for the cross-domain open-vocabulary action recognition task, 
	and a simple yet effective method to address the scene bias for the task. 
        </p>
      </td>
    </tr>

    <tr onmouseout="pargo()" onmouseover="pargo()" bgcolor="#ffffd0">
<!--     <tr onmouseout="nuvo_stop()" onmouseover="nuvo_start()"> -->
      <td style="padding:20px;width:25%;vertical-align:middle">
<!--         <div class="one"> -->
<!--           <div class="two" id='pargo'><video  width=100% muted autoplay loop>
          <source src="images/nuvo.mp4" type="video/mp4">
          Your browser does not support the video tag.
          </video></div> -->
          <img src='images/pargo.png' width=100%>
<!--         </div> -->
<!--         <script type="text/javascript">
          function nuvo_start() {
            document.getElementById('nuvo_image').style.opacity = "1";
          }

          function nuvo_stop() {
            document.getElementById('nuvo_image').style.opacity = "0";
          }
          nuvo_stop()
        </script> -->
      </td>
	    
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2408.12928">
	<span class="papertitle">ParGo: Bridging Vision-Language with Partial and Global Views</span>
        </a>
        <br>
        An-Lan Wang, Bin Shan, Wei Shi, <strong>Kun-Yu Lin</strong>, Xiang Fei, Guozhi Tang, Lei Liao, Jingqun Tang, Can Huang, Wei-Shi Zheng
<!--         <a href="https://bmild.github.io/">Ben Mildenhall</a> -->
        <br>
        <em>AAAI</em>, 2025
        <br>
        <a href="https://arxiv.org/abs/2408.12928">arXiv</a>
        <p></p>
        <p>
	A novel connector for bridging vision and language modalities by both global and partial views, and a large-scale image-text datasets consisting of detailed captions.  
        </p>
      </td>
    </tr>
		  

    <tr onmouseout="huamnrobotalign()" onmouseover="humanrobotalign()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/humanrobotalign.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2406.14235">
	<span class="papertitle">Mitigating the Human-Robot Domain Discrepancy in Visual Pre-training for Robotic Manipulation</span>
        </a>
        <br>
	Jiaming Zhou, Teli Ma, <strong>Kun-Yu Lin</strong>, Ronghe Qiu, Zifan Wang, Junwei Liang
        <br>
        <em>arXiv</em>, 2024
        <br>
        <a href="https://arxiv.org/abs/2406.14235">arXiv</a>
	/
        <a href="https://jiaming-zhou.github.io/projects/HumanRobotAlign/">project page</a>
        <p></p>
        <p>
	A new paradigm utilizing paired human-robot videos to adapt human-data pretrained models for robotic manipulation tasks.  
        </p>
      </td>
    </tr>
		  
    <tr onmouseout="hctransformer()" onmouseover="hctransformer()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/hctransformer.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10599825">
	<span class="papertitle">Human-Centric Transformer for Domain Adaptive Action Recognition</span>
        </a>
        <br>
        <strong>Kun-Yu Lin</strong>, Jiaming Zhou, Wei-Shi Zheng
        <br>
        <em>TPAMI</em>, 2024
        <br>
        <a href="https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=10599825">paper</a>
	/
        <a href="https://arxiv.org/abs/2407.10860">arXiv</a>
        <p></p>
        <p>
	A human-centric video network to address the context bias in domain adaptive action recognition.  
        </p>
      </td>
    </tr>

    <tr onmouseout="stdn()" onmouseover="stdn()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/stdn.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2310.17942">
	<span class="papertitle">Diversifying Spatial-Temporal Perception for Video Domain Generalization</span>
        </a>
        <br>
        <strong>Kun-Yu Lin</strong>, Jia-Run Du, Yipeng Gao, Jiaming Zhou, Wei-Shi Zheng
        <br>
        <em>NeurIPS</em>, 2023
        <br>
        <a href="https://openreview.net/forum?id=YsZTDcIQwQ">paper</a>
        /
        <a href="https://arxiv.org/abs/2310.17942">arXiv</a>
        /
        <a href="https://github.com/KunyuLin/STDN/">github</a>
        <p></p>
        <p>
	A diversity-aware video network to address the bias to domain-specific information in video domain generalization. 
        </p>
      </td>
    </tr>

    <tr onmouseout="e3p()" onmouseover="e3p()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/e3p.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://openaccess.thecvf.com/content/ICCV2023/papers/Wang_Event-Guided_Procedure_Planning_from_Instructional_Videos_with_Text_Supervision_ICCV_2023_paper.pdf">
	<span class="papertitle">Event-Guided Procedure Planning from Instructional Videos with Text Supervision</span>
        </a>
        <br>
        An-Lan Wang#, <strong>Kun-Yu Lin#</strong>, Jia-Run Du, Jingke Meng, Wei-Shi Zheng
        <br>
        <em>ICCV</em>, 2023
        <br>
        <a href="https://openaccess.thecvf.com/content/ICCV2023/papers/Wang_Event-Guided_Procedure_Planning_from_Instructional_Videos_with_Text_Supervision_ICCV_2023_paper.pdf">paper</a>
        /
        <a href="https://arxiv.org/abs/2308.08885">arXiv</a>
        <p></p>
        <p>
	A new event-guided paradigm to address the semantic gap between observed states and unobserved actions for procedure planning in instructional videos. 
        </p>
      </td>
    </tr>

    <tr onmouseout="asyfod()" onmouseover="asyfod()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/asyfod.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://openaccess.thecvf.com/content/CVPR2023/papers/Gao_AsyFOD_An_Asymmetric_Adaptation_Paradigm_for_Few-Shot_Domain_Adaptive_Object_CVPR_2023_paper.pdf">
	<span class="papertitle">AsyFOD: An Asymmetric Adaptation Paradigm for Few-Shot Domain Adaptive Object Detection</span>
        </a>
        <br>
        Yipeng Gao#, <strong>Kun-Yu Lin#</strong>, Junkai Yan, Yaowei Wang, Wei-Shi Zheng
        <br>
        <em>CVPR</em>, 2023
        <br>
        <a href="https://openaccess.thecvf.com/content/CVPR2023/papers/Gao_AsyFOD_An_Asymmetric_Adaptation_Paradigm_for_Few-Shot_Domain_Adaptive_Object_CVPR_2023_paper.pdf">paper</a>
        /
        <a href="https://github.com/Hlings/AsyFOD">github</a>
        <p></p>
        <p>
	An asymmetric adaptation paradigm for few-shot domain adaptive object detection. 
        </p>
      </td>
    </tr>

    <tr onmouseout="stdn()" onmouseover="stdn()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/dilateformer.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2302.01791">
	<span class="papertitle">DilateFormer: Multi-Scale Dilated Transformer for Visual Recognition</span>
        </a>
        <br>
        Jiayu Jiao#, Yu-Ming Tang#, <strong>Kun-Yu Lin</strong>, Yipeng Gao, Jinhua Ma, Yaowei Wang, Wei-Shi Zheng
        <br>
        <em>TMM</em>, 2023
        <br>
        <a href="https://ieeexplore.ieee.org/abstract/document/10041780/">paper</a>
        /
        <a href="https://arxiv.org/abs/2302.01791">arXiv</a>
        /     
        <a href="https://isee-ai.cn/~jiaojiayu/DilteFormer.html">project page</a>
        /
        <a href="https://github.com/JIAOJIAYUASD/dilateformer">github</a>
        <p></p>
        <p>
	A new vision transformer architecture for efficient and effective visual understanding. 
        </p>
      </td>
    </tr>

    <tr onmouseout="ood_sa()" onmouseover="ood_sa()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/ood_sa.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2206.09380">
	<span class="papertitle">Supervision Adaptation Balancing In-distribution Generalization and Out-of-distribution Detection</span>
        </a>
        <br>
        Zhilin Zhao, Longbing Cao, <strong>Kun-Yu Lin</strong>
        <br>
        <em>TPAMI</em>, 2023
        <br>
        <a href="https://ieeexplore.ieee.org/document/10271740/">paper</a>
        /
        <a href="https://arxiv.org/abs/2206.09380">arxiv</a>
        /
        <a href="https://github.com/Lawliet-zzl/SA">github</a>
        <p></p>
        <p>
	A theorectical method to balancing in-distribution generalization and out-of-distribution detection.
        </p>
      </td>
    </tr>
		  
    <tr onmouseout="ood_fig()" onmouseover="ood_fig()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/ood_fig.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2108.09976">
	<span class="papertitle">Revealing the Distributional Vulnerability of Discriminators by Implicit Generators</span>
        </a>
        <br>
        Zhilin Zhao, Longbing Cao, <strong>Kun-Yu Lin</strong>
        <br>
        <em>TPAMI</em>, 2023
        <br>
        <a href="https://ieeexplore.ieee.org/document/9987694/">paper</a>
        /
        <a href="https://arxiv.org/abs/2108.09976">arxiv</a>
        /
        <a href="https://github.com/Lawliet-zzl/FIG">github</a>
        <p></p>
        <p>
	A theorectical method based on implicit generators to improve out-of-distribution detection. 
        </p>
      </td>
    </tr>
		  
    <tr onmouseout="cwan()" onmouseover="cwan()">
      <td style="padding:20px;width:25%;vertical-align:middle">
	<img src='images/cwan.png' width=100%>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://www.ecva.net/papers/eccv_2022/papers_ECCV/papers/136930520.pdf">
	<span class="papertitle">Adversarial Partial Domain Adaptation by Cycle Inconsistency</span>
        </a>
        <br>
        <strong>Kun-Yu Lin</strong>, Jiaming Zhou, Yukun Qiu, Wei-Shi Zheng
        <br>
        <em>ECCV</em>, 2022
        <br>
        <a href="https://www.ecva.net/papers/eccv_2022/papers_ECCV/papers/136930520.pdf">paper</a>
        /
        <a href="https://github.com/KunyuLin/CWAN">github</a>
        <p></p>
        <p>
	A simple yet effective method based on cycle transformation to filter out outlier classes in partial domain adaptation. 
        </p>
      </td>
    </tr>

		  

          </tbody></table>

          
          <table width="100%" align="center" border="0" cellspacing="0" cellpadding="20"><tbody>
            <tr>
              <td>
                <h2>Services</h2>
              </td>
            </tr>
          </tbody></table>
          <table width="100%" align="center" border="0" cellpadding="20"><tbody>
            <tr>
<!--               <td style="padding:20px;width:25%;vertical-align:middle"><img src="images/cvf.jpg"></td> -->
              <td style="padding:20px;width:25%;vertical-align:middle"><img src="images/services.png" width=100%></td>
              <td width="75%" valign="center">
		Reviewer of CVPR23, CVPR24
                <br>
		Reviewer of ICCV23
                <br>
		Reviewer of ECCV24
                <br>
		Reviewer of ICLR25
                <br>
		Reviewer of NeurIPS24
                <br>
		Reviewer of IJCAI24
                <br>
		Reviewer of TCSVT
                <br>
              </td>
            </tr>
          </tbody></table>

	
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
            <tr>
              <td style="padding:0px">
                <br>
                <p style="text-align:right;font-size:small;">
			This website borrows from Jon Barron.
<!--                   Feel free to steal this website's <a href="https://github.com/jonbarron/jonbarron_website">source code</a>. <strong>Do not</strong> scrape the HTML from this page itself, as it includes analytics tags that you do not want on your own website &mdash; use the github code instead. Also, consider using <a href="https://leonidk.com/">Leonid Keselman</a>'s <a href="https://github.com/leonidk/new_website">Jekyll fork</a> of this page. -->
                </p>
              </td>
            </tr>
          </tbody></table>
        </td>
      </tr>
    </table>
  </body>
</html>