Publications
Search
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhang, Mingyuan; Cai, Zhongang; Pan, Liang; Hong, Fangzhou; Guo, Xinying; Yang, Lei; Liu, Ziwei
MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model Journal Article
In: IEEE Trans. Pattern Anal. Mach. Intell., vol. 46, no. 6, pp. 4115–4128, 2024, ISSN: 0162-8828, 2160-9292, 1939-3539.
@article{zhang_motiondiffuse_2024,
title = {MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model},
author = {Mingyuan Zhang and Zhongang Cai and Liang Pan and Fangzhou Hong and Xinying Guo and Lei Yang and Ziwei Liu},
url = {https://ieeexplore.ieee.org/document/10416192/},
doi = {10.1109/TPAMI.2024.3355414},
issn = {0162-8828, 2160-9292, 1939-3539},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-18},
journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
volume = {46},
number = {6},
pages = {4115–4128},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Nurunnabi, Abdul; Teferle, Felicia; Laefer, Debra F.; Chen, Meida; Ali, Mir Masoom
Development of a Precise Tree Structure from LiDAR Point Clouds Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 301–308, 2024, ISSN: 2194-9034.
@article{nurunnabi_development_2024,
title = {Development of a Precise Tree Structure from LiDAR Point Clouds},
author = {Abdul Nurunnabi and Felicia Teferle and Debra F. Laefer and Meida Chen and Mir Masoom Ali},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/301/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-301-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {301–308},
abstract = {Abstract. A precise tree structure that represents the distribution of tree stem, branches, and leaves is crucial for accurately capturing the full representation of a tree. Light Detection and Ranging (LiDAR)-based three-dimensional (3D) point clouds (PCs) capture the geometry of scanned objects including forests stands and individual trees. PCs are irregular, unstructured, often noisy, and contaminated by outliers. Researchers have struggled to develop methods to separate leaves and wood without losing the tree geometry. This paper proposes a solution that employs only the spatial coordinates (x, y, z) of the PC. The new algorithm works as a filtering approach, utilizing multi-scale neighborhood-based geometric features (GFs) e.g., linearity, planarity, and verticality to classify linear (wood) and non-linear (leaf) points. This involves finding potential wood points and coupling them with an octree-based segmentation to develop a tree architecture. The main contributions of this paper are (i) investigating the potential of different GFs to split linear and non-linear points, (ii) introducing a novel method that pointwise classifies leaf and wood points, and (iii) developing a precise 3D tree structure. The performance of the new algorithm has been demonstrated through terrestrial laser scanning PCs. For a Scots pine tree, the new method classifies leaf and wood points with an overall accuracy of 97.9%.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhang, Hao; Chang, Di; Li, Fang; Soleymani, Mohammad; Ahuja, Narendra
MagicPose4D: Crafting Articulated Models with Appearance and Motion Control Miscellaneous
2024, (Version Number: 1).
@misc{zhang_magicpose4d_2024,
title = {MagicPose4D: Crafting Articulated Models with Appearance and Motion Control},
author = {Hao Zhang and Di Chang and Fang Li and Mohammad Soleymani and Narendra Ahuja},
url = {https://arxiv.org/abs/2405.14017},
doi = {10.48550/ARXIV.2405.14017},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
publisher = {arXiv},
abstract = {With the success of 2D and 3D visual generative models, there is growing interest in generating 4D content. Existing methods primarily rely on text prompts to produce 4D content, but they often fall short of accurately defining complex or rare motions. To address this limitation, we propose MagicPose4D, a novel framework for refined control over both appearance and motion in 4D generation. Unlike traditional methods, MagicPose4D accepts monocular videos as motion prompts, enabling precise and customizable motion generation. MagicPose4D comprises two key modules:
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hui; Kuang, Bingran; Zhao, Yajie
Camera Calibration using a Single View of a Symmetric Object Proceedings Article
In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2705–2709, IEEE, Seoul, Korea, Republic of, 2024, ISBN: 9798350344851.
@inproceedings{zhang_camera_2024,
title = {Camera Calibration using a Single View of a Symmetric Object},
author = {Hui Zhang and Bingran Kuang and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/10446005/},
doi = {10.1109/ICASSP48485.2024.10446005},
isbn = {9798350344851},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {2705–2709},
publisher = {IEEE},
address = {Seoul, Korea, Republic of},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Zhao, Yajie
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting Miscellaneous
2024, (arXiv:2403.18186 [cs]).
@misc{chen_dont_2024,
title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting},
author = {Haiwei Chen and Yajie Zhao},
url = {http://arxiv.org/abs/2403.18186},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {We present a method for large-mask pluralistic image inpainting based on the generative framework of discrete latent codes. Our method learns latent priors, discretized as tokens, by only performing computations at the visible locations of the image. This is realized by a restrictive partial encoder that predicts the token label for each visible block, a bidirectional transformer that infers the missing labels by only looking at these tokens, and a dedicated synthesis network that couples the tokens with the partial image priors to generate coherent and pluralistic complete image even under extreme mask settings. Experiments on public benchmarks validate our design choices as the proposed method outperforms strong baselines in both visual quality and diversity metrics.},
note = {arXiv:2403.18186 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Yang, Jing; Xiao, Hanyuan; Teng, Wenbin; Cai, Yunxuan; Zhao, Yajie
Light Sampling Field and BRDF Representation for Physically-based Neural Rendering Journal Article
In: 2023, (Publisher: arXiv Version Number: 1).
@article{yang_light_2023,
title = {Light Sampling Field and BRDF Representation for Physically-based Neural Rendering},
author = {Jing Yang and Hanyuan Xiao and Wenbin Teng and Yunxuan Cai and Yajie Zhao},
url = {https://arxiv.org/abs/2304.05472},
doi = {10.48550/ARXIV.2304.05472},
year = {2023},
date = {2023-04-01},
urldate = {2023-08-22},
abstract = {Physically-based rendering (PBR) is key for immersive rendering effects used widely in the industry to showcase detailed realistic scenes from computer graphics assets. A well-known caveat is that producing the same is computationally heavy and relies on complex capture devices. Inspired by the success in quality and efficiency of recent volumetric neural rendering, we want to develop a physically-based neural shader to eliminate device dependency and significantly boost performance. However, no existing lighting and material models in the current neural rendering approaches can accurately represent the comprehensive lighting models and BRDFs properties required by the PBR process. Thus, this paper proposes a novel lighting representation that models direct and indirect light locally through a light sampling strategy in a learned light sampling field. We also propose BRDF models to separately represent surface/subsurface scattering details to enable complex objects such as translucent material (i.e., skin, jade). We then implement our proposed representations with an end-to-end physically-based neural face skin shader, which takes a standard face asset (i.e., geometry, albedo map, and normal map) and an HDRI for illumination as inputs and generates a photo-realistic rendering as output. Extensive experiments showcase the quality and efficiency of our PBR face skin shader, indicating the effectiveness of our proposed lighting and material representations.},
note = {Publisher: arXiv
Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chemburkar, Ankur; Lu, Shuhong; Feng, Andrew
MoDDM: Text-to-Motion Synthesis using Discrete Diffusion Model Proceedings Article
In: 2023.
@inproceedings{chemburkar_moddm_2023,
title = {MoDDM: Text-to-Motion Synthesis using Discrete Diffusion Model},
author = {Ankur Chemburkar and Shuhong Lu and Andrew Feng},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://papers.bmvc2023.org/0624.pdf},
year = {2023},
date = {2023-01-01},
abstract = {We present the motion discrete diffusion model (MoDDM) for synthesizing human motion from text descriptions that addresses challenges in cross-modal mapping and motion diversity. The previous methods that utilized variational autoencoder (VAE) to learn the latent distributions for text-to-motion synthesis tend to produce motions with less diversity and fidelity. While the diffusion models show promising results by generating high quality motions, they require higher computational costs and may produce motions less aligned with the input text. The proposed method combines the discrete latent space and diffusion models to learn an expressive conditional probabilistic mapping for motion synthesis. Our method utilizes vector quantization variational autoencoder (VQ-VAE) to learn discrete motion tokens and then applies discrete denoising diffusion probabilistic models (D3PM) to learn the conditional probability distributions for the motion tokens. The discrete classifier-free guidance is further utilized in the training process with proper guidance scale for aligning the motions and the corresponding text descriptions. By learning the denoising model in the discrete latent space, the method produces high quality motion results while greatly reducing computational costs compared to training the diffusion models on raw motion sequences. The evaluation results show that the proposed approach outperforms previous methods in both motion quality and text-to-motion matching accuracy.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Shichen; Cai, Yunxuan; Chen, Haiwei; Zhou, Yichao; Zhao, Yajie
Rapid Face Asset Acquisition with Recurrent Feature Alignment Journal Article
In: ACM Trans. Graph., vol. 41, no. 6, pp. 214:1–214:17, 2022, ISSN: 0730-0301.
@article{liu_rapid_2022,
title = {Rapid Face Asset Acquisition with Recurrent Feature Alignment},
author = {Shichen Liu and Yunxuan Cai and Haiwei Chen and Yichao Zhou and Yajie Zhao},
url = {https://dl.acm.org/doi/10.1145/3550454.3555509},
doi = {10.1145/3550454.3555509},
issn = {0730-0301},
year = {2022},
date = {2022-11-01},
urldate = {2023-03-31},
journal = {ACM Trans. Graph.},
volume = {41},
number = {6},
pages = {214:1–214:17},
abstract = {We present Recurrent Feature Alignment (ReFA), an end-to-end neural network for the very rapid creation of production-grade face assets from multi-view images. ReFA is on par with the industrial pipelines in quality for producing accurate, complete, registered, and textured assets directly applicable to physically-based rendering, but produces the asset end-to-end, fully automatically at a significantly faster speed at 4.5 FPS, which is unprecedented among neural-based techniques. Our method represents face geometry as a position map in the UV space. The network first extracts per-pixel features in both the multi-view image space and the UV space. A recurrent module then iteratively optimizes the geometry by projecting the image-space features to the UV space and comparing them with a reference UV-space feature. The optimized geometry then provides pixel-aligned signals for the inference of high-resolution textures. Experiments have validated that ReFA achieves a median error of 0.603mm in geometry reconstruction, is robust to extreme pose and expression, and excels in sparse-view settings. We believe that the progress achieved by our network enables lightweight, fast face assets acquisition that significantly boosts the downstream applications, such as avatar creation and facial performance capture. It will also enable massive database capturing for deep learning purposes.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kuang, Zhengfei; Li, Jiaman; He, Mingming; Wang, Tong; Zhao, Yajie
DenseGAP: Graph-Structured Dense Correspondence Learning with Anchor Points Proceedings Article
In: pp. 542–549, IEEE Computer Society, 2022, ISBN: 978-1-66549-062-7.
@inproceedings{kuang_densegap_2022,
title = {DenseGAP: Graph-Structured Dense Correspondence Learning with Anchor Points},
author = {Zhengfei Kuang and Jiaman Li and Mingming He and Tong Wang and Yajie Zhao},
url = {https://www.computer.org/csdl/proceedings-article/icpr/2022/09956472/1IHpppIuqOc},
doi = {10.1109/ICPR56361.2022.9956472},
isbn = {978-1-66549-062-7},
year = {2022},
date = {2022-08-01},
urldate = {2023-03-31},
pages = {542–549},
publisher = {IEEE Computer Society},
abstract = {Establishing dense correspondence between two images is a fundamental computer vision problem, which is typically tackled by matching local feature descriptors. However, without global awareness, such local features are often insufficient for disambiguating similar regions. And computing the pairwise feature correlation across images is both computation-expensive and memory-intensive. To make the local features aware of the global context and improve their matching accuracy, we introduce DenseGAP, a new solution for efficient Dense correspondence learning with a Graph-structured neural network conditioned on Anchor Points. Specifically, we first propose a graph structure that utilizes anchor points to provide sparse but reliable prior on inter- and intra-image context and propagates them to all image points via directed edges. We also design a graph-structured network to broadcast multi-level contexts via light-weighted message-passing layers and generate high-resolution feature maps at low memory cost. Finally, based on the predicted feature maps, we introduce a coarse-to-fine framework for accurate correspondence prediction using cycle consistency. Our feature descriptors capture both local and global information, thus enabling a continuous feature field for querying arbitrary points at high resolution. Through comprehensive ablative experiments and evaluations on large-scale indoor and outdoor datasets, we demonstrate that our method advances the state-of-the-art of correspondence learning on most benchmarks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Liu, Jiayi; Chen, Weikai; Liu, Shichen; Zhao, Yajie
Exemplar-based Pattern Synthesis with Implicit Periodic Field Network Proceedings Article
In: 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3698–3707, IEEE, New Orleans, LA, USA, 2022, ISBN: 978-1-66546-946-3.
@inproceedings{chen_exemplar-based_2022,
title = {Exemplar-based Pattern Synthesis with Implicit Periodic Field Network},
author = {Haiwei Chen and Jiayi Liu and Weikai Chen and Shichen Liu and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9879904/},
doi = {10.1109/CVPR52688.2022.00369},
isbn = {978-1-66546-946-3},
year = {2022},
date = {2022-06-01},
urldate = {2023-02-10},
booktitle = {2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
pages = {3698–3707},
publisher = {IEEE},
address = {New Orleans, LA, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Danieau, Fabien; Guillotel, Philippe; Hoyet, Ludovic; Tonneau, Steve; Zhao, Yajie
Editorial: Creating Lifelike Digital Humans Journal Article
In: Front. Virtual Real., vol. 3, pp. 906118, 2022, ISSN: 2673-4192.
@article{danieau_editorial_2022,
title = {Editorial: Creating Lifelike Digital Humans},
author = {Fabien Danieau and Philippe Guillotel and Ludovic Hoyet and Steve Tonneau and Yajie Zhao},
url = {https://www.frontiersin.org/articles/10.3389/frvir.2022.906118/full},
doi = {10.3389/frvir.2022.906118},
issn = {2673-4192},
year = {2022},
date = {2022-04-01},
urldate = {2024-08-13},
journal = {Front. Virtual Real.},
volume = {3},
pages = {906118},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Liu, Shichen; Li, Tianye; Chen, Weikai; Li, Hao
A General Differentiable Mesh Renderer for Image-Based 3D Reasoning Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 44, no. 1, pp. 50–62, 2022, ISSN: 1939-3539, (Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence).
@article{liu_general_2022,
title = {A General Differentiable Mesh Renderer for Image-Based 3D Reasoning},
author = {Shichen Liu and Tianye Li and Weikai Chen and Hao Li},
doi = {10.1109/TPAMI.2020.3007759},
issn = {1939-3539},
year = {2022},
date = {2022-01-01},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {44},
number = {1},
pages = {50–62},
abstract = {Rendering bridges the gap between 2D vision and 3D scenes by simulating the physical process of image formation. By inverting such renderer, one can think of a learning approach to infer 3D information from 2D images. However, standard graphics renderers involve a fundamental step called rasterization, which prevents rendering to be differentiable. Unlike the state-of-the-art differentiable renderers (Kato et al. 2018 and Loper 2018), which only approximate the rendering gradient in the backpropagation, we propose a natually differentiable rendering framework that is able to (1) directly render colorized mesh using differentiable functions and (2) back-propagate efficient supervisions to mesh vertices and their attributes from various forms of image representations. The key to our framework is a novel formulation that views rendering as an aggregation function that fuses the probabilistic contributions of all mesh triangles with respect to the rendered pixels. Such formulation enables our framework to flow gradients to the occluded and distant vertices, which cannot be achieved by the previous state-of-the-arts. We show that by using the proposed renderer, one can achieve significant improvement in 3D unsupervised single-view reconstruction both qualitatively and quantitatively. Experiments also demonstrate that our approach can handle the challenging tasks in image-based shape fitting, which remain nontrivial to existing differentiable renders.},
note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Jiaman; Villegas, Ruben; Ceylan, Duygu; Yang, Jimei; Kuang, Zhengfei; Li, Hao; Zhao, Yajie
Task-Generic Hierarchical Human Motion Prior using VAEs Proceedings Article
In: 2021 International Conference on 3D Vision (3DV), pp. 771–781, IEEE, London, United Kingdom, 2021, ISBN: 978-1-66542-688-6.
@inproceedings{li_task-generic_2021,
title = {Task-Generic Hierarchical Human Motion Prior using VAEs},
author = {Jiaman Li and Ruben Villegas and Duygu Ceylan and Jimei Yang and Zhengfei Kuang and Hao Li and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9665881/},
doi = {10.1109/3DV53792.2021.00086},
isbn = {978-1-66542-688-6},
year = {2021},
date = {2021-12-01},
urldate = {2022-09-22},
booktitle = {2021 International Conference on 3D Vision (3DV)},
pages = {771–781},
publisher = {IEEE},
address = {London, United Kingdom},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Shichen; Zhou, Yichao; Zhao, Yajie
VaPiD: A Rapid Vanishing Point Detector via Learned Optimizers Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 12839–12848, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
@inproceedings{liu_vapid_2021,
title = {VaPiD: A Rapid Vanishing Point Detector via Learned Optimizers},
author = {Shichen Liu and Yichao Zhou and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9711313/},
doi = {10.1109/ICCV48922.2021.01262},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-22},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {12839–12848},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Tianye; Liu, Shichen; Bolkart, Timo; Liu, Jiayi; Li, Hao; Zhao, Yajie
Topologically Consistent Multi-View Face Inference Using Volumetric Sampling Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 3804–3814, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
@inproceedings{li_topologically_2021,
title = {Topologically Consistent Multi-View Face Inference Using Volumetric Sampling},
author = {Tianye Li and Shichen Liu and Timo Bolkart and Jiayi Liu and Hao Li and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9711264/},
doi = {10.1109/ICCV48922.2021.00380},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-22},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {3804–3814},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Xiang, Sitao; Gu, Yuming; Xiang, Pengda; Chai, Menglei; Li, Hao; Zhao, Yajie; He, Mingming
DisUnknown: Distilling Unknown Factors for Disentanglement Learning Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 14790–14799, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
@inproceedings{xiang_disunknown_2021,
title = {DisUnknown: Distilling Unknown Factors for Disentanglement Learning},
author = {Sitao Xiang and Yuming Gu and Pengda Xiang and Menglei Chai and Hao Li and Yajie Zhao and Mingming He},
url = {https://ieeexplore.ieee.org/document/9709965/},
doi = {10.1109/ICCV48922.2021.01454},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-23},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {14790–14799},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Xiang, Sitao
Eliminating topological errors in neural network rotation estimation using self-selecting ensembles Journal Article
In: ACM Trans. Graph., vol. 40, no. 4, pp. 167:1–167:21, 2021, ISSN: 0730-0301.
@article{xiang_eliminating_2021,
title = {Eliminating topological errors in neural network rotation estimation using self-selecting ensembles},
author = {Sitao Xiang},
url = {https://dl.acm.org/doi/10.1145/3450626.3459882},
doi = {10.1145/3450626.3459882},
issn = {0730-0301},
year = {2021},
date = {2021-07-01},
urldate = {2023-03-31},
journal = {ACM Trans. Graph.},
volume = {40},
number = {4},
pages = {167:1–167:21},
abstract = {Many problems in computer graphics and computer vision applications involves inferring a rotation from a variety of different forms of inputs. With the increasing use of deep learning, neural networks have been employed to solve such problems. However, the traditional representations for 3D rotations, the quaternions and Euler angles, are found to be problematic for neural networks in practice, producing seemingly unavoidable large estimation errors. Previous researches has identified the discontinuity of the mapping from SO(3) to the quaternions or Euler angles as the source of such errors, and to solve it, embeddings of SO(3) have been proposed as the output representation of rotation estimation networks instead. In this paper, we argue that the argument against quaternions and Euler angles from local discontinuities of the mappings from SO(3) is flawed, and instead provide a different argument from the global topological properties of SO(3) that also establishes the lower bound of maximum error when using quaternions and Euler angles for rotation estimation networks. Extending from this view, we discover that rotation symmetries in the input object causes additional topological problems that even using embeddings of SO(3) as the output representation would not correctly handle. We propose the self-selecting ensemble, a topologically motivated approach, where the network makes multiple predictions and assigns weights to them. We show theoretically and with experiments that our methods can be combined with a wide range of different rotation representations and can handle all kinds of finite symmetries in 3D rotation estimation problems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, Haiwei; Liu, Shichen; Chen, Weikai; Li, Hao; Hill, Randall
Equivariant Point Network for 3D Point Cloud Analysis Proceedings Article
In: pp. 14514–14523, 2021.
@inproceedings{chen_equivariant_2021,
title = {Equivariant Point Network for 3D Point Cloud Analysis},
author = {Haiwei Chen and Shichen Liu and Weikai Chen and Hao Li and Randall Hill},
url = {https://openaccess.thecvf.com/content/CVPR2021/html/Chen_Equivariant_Point_Network_for_3D_Point_Cloud_Analysis_CVPR_2021_paper.html},
year = {2021},
date = {2021-01-01},
urldate = {2023-03-31},
pages = {14514–14523},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Filter
2024
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: DTIC, Graphics, VGL
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {DTIC, Graphics, VGL},
pubstate = {published},
tppubtype = {article}
}
Zhang, Mingyuan; Cai, Zhongang; Pan, Liang; Hong, Fangzhou; Guo, Xinying; Yang, Lei; Liu, Ziwei
MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model Journal Article
In: IEEE Trans. Pattern Anal. Mach. Intell., vol. 46, no. 6, pp. 4115–4128, 2024, ISSN: 0162-8828, 2160-9292, 1939-3539.
@article{zhang_motiondiffuse_2024,
title = {MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model},
author = {Mingyuan Zhang and Zhongang Cai and Liang Pan and Fangzhou Hong and Xinying Guo and Lei Yang and Ziwei Liu},
url = {https://ieeexplore.ieee.org/document/10416192/},
doi = {10.1109/TPAMI.2024.3355414},
issn = {0162-8828, 2160-9292, 1939-3539},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-18},
journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
volume = {46},
number = {6},
pages = {4115–4128},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
Nurunnabi, Abdul; Teferle, Felicia; Laefer, Debra F.; Chen, Meida; Ali, Mir Masoom
Development of a Precise Tree Structure from LiDAR Point Clouds Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 301–308, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: Narrative, VGL
@article{nurunnabi_development_2024,
title = {Development of a Precise Tree Structure from LiDAR Point Clouds},
author = {Abdul Nurunnabi and Felicia Teferle and Debra F. Laefer and Meida Chen and Mir Masoom Ali},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/301/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-301-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {301–308},
abstract = {Abstract. A precise tree structure that represents the distribution of tree stem, branches, and leaves is crucial for accurately capturing the full representation of a tree. Light Detection and Ranging (LiDAR)-based three-dimensional (3D) point clouds (PCs) capture the geometry of scanned objects including forests stands and individual trees. PCs are irregular, unstructured, often noisy, and contaminated by outliers. Researchers have struggled to develop methods to separate leaves and wood without losing the tree geometry. This paper proposes a solution that employs only the spatial coordinates (x, y, z) of the PC. The new algorithm works as a filtering approach, utilizing multi-scale neighborhood-based geometric features (GFs) e.g., linearity, planarity, and verticality to classify linear (wood) and non-linear (leaf) points. This involves finding potential wood points and coupling them with an octree-based segmentation to develop a tree architecture. The main contributions of this paper are (i) investigating the potential of different GFs to split linear and non-linear points, (ii) introducing a novel method that pointwise classifies leaf and wood points, and (iii) developing a precise 3D tree structure. The performance of the new algorithm has been demonstrated through terrestrial laser scanning PCs. For a Scots pine tree, the new method classifies leaf and wood points with an overall accuracy of 97.9%.},
keywords = {Narrative, VGL},
pubstate = {published},
tppubtype = {article}
}
Zhang, Hao; Chang, Di; Li, Fang; Soleymani, Mohammad; Ahuja, Narendra
MagicPose4D: Crafting Articulated Models with Appearance and Motion Control Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: VGL, Virtual Humans
@misc{zhang_magicpose4d_2024,
title = {MagicPose4D: Crafting Articulated Models with Appearance and Motion Control},
author = {Hao Zhang and Di Chang and Fang Li and Mohammad Soleymani and Narendra Ahuja},
url = {https://arxiv.org/abs/2405.14017},
doi = {10.48550/ARXIV.2405.14017},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
publisher = {arXiv},
abstract = {With the success of 2D and 3D visual generative models, there is growing interest in generating 4D content. Existing methods primarily rely on text prompts to produce 4D content, but they often fall short of accurately defining complex or rare motions. To address this limitation, we propose MagicPose4D, a novel framework for refined control over both appearance and motion in 4D generation. Unlike traditional methods, MagicPose4D accepts monocular videos as motion prompts, enabling precise and customizable motion generation. MagicPose4D comprises two key modules:
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.},
note = {Version Number: 1},
keywords = {VGL, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
Abstract | Links | BibTeX | Tags: Graphics, VGL
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hui; Kuang, Bingran; Zhao, Yajie
Camera Calibration using a Single View of a Symmetric Object Proceedings Article
In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2705–2709, IEEE, Seoul, Korea, Republic of, 2024, ISBN: 9798350344851.
Links | BibTeX | Tags: Graphics, VGL
@inproceedings{zhang_camera_2024,
title = {Camera Calibration using a Single View of a Symmetric Object},
author = {Hui Zhang and Bingran Kuang and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/10446005/},
doi = {10.1109/ICASSP48485.2024.10446005},
isbn = {9798350344851},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {2705–2709},
publisher = {IEEE},
address = {Seoul, Korea, Republic of},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Zhao, Yajie
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting Miscellaneous
2024, (arXiv:2403.18186 [cs]).
Abstract | Links | BibTeX | Tags: VGL
@misc{chen_dont_2024,
title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting},
author = {Haiwei Chen and Yajie Zhao},
url = {http://arxiv.org/abs/2403.18186},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {We present a method for large-mask pluralistic image inpainting based on the generative framework of discrete latent codes. Our method learns latent priors, discretized as tokens, by only performing computations at the visible locations of the image. This is realized by a restrictive partial encoder that predicts the token label for each visible block, a bidirectional transformer that infers the missing labels by only looking at these tokens, and a dedicated synthesis network that couples the tokens with the partial image priors to generate coherent and pluralistic complete image even under extreme mask settings. Experiments on public benchmarks validate our design choices as the proposed method outperforms strong baselines in both visual quality and diversity metrics.},
note = {arXiv:2403.18186 [cs]},
keywords = {VGL},
pubstate = {published},
tppubtype = {misc}
}
2023
Yang, Jing; Xiao, Hanyuan; Teng, Wenbin; Cai, Yunxuan; Zhao, Yajie
Light Sampling Field and BRDF Representation for Physically-based Neural Rendering Journal Article
In: 2023, (Publisher: arXiv Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC, UARC, VGL
@article{yang_light_2023,
title = {Light Sampling Field and BRDF Representation for Physically-based Neural Rendering},
author = {Jing Yang and Hanyuan Xiao and Wenbin Teng and Yunxuan Cai and Yajie Zhao},
url = {https://arxiv.org/abs/2304.05472},
doi = {10.48550/ARXIV.2304.05472},
year = {2023},
date = {2023-04-01},
urldate = {2023-08-22},
abstract = {Physically-based rendering (PBR) is key for immersive rendering effects used widely in the industry to showcase detailed realistic scenes from computer graphics assets. A well-known caveat is that producing the same is computationally heavy and relies on complex capture devices. Inspired by the success in quality and efficiency of recent volumetric neural rendering, we want to develop a physically-based neural shader to eliminate device dependency and significantly boost performance. However, no existing lighting and material models in the current neural rendering approaches can accurately represent the comprehensive lighting models and BRDFs properties required by the PBR process. Thus, this paper proposes a novel lighting representation that models direct and indirect light locally through a light sampling strategy in a learned light sampling field. We also propose BRDF models to separately represent surface/subsurface scattering details to enable complex objects such as translucent material (i.e., skin, jade). We then implement our proposed representations with an end-to-end physically-based neural face skin shader, which takes a standard face asset (i.e., geometry, albedo map, and normal map) and an HDRI for illumination as inputs and generates a photo-realistic rendering as output. Extensive experiments showcase the quality and efficiency of our PBR face skin shader, indicating the effectiveness of our proposed lighting and material representations.},
note = {Publisher: arXiv
Version Number: 1},
keywords = {DTIC, UARC, VGL},
pubstate = {published},
tppubtype = {article}
}
Chemburkar, Ankur; Lu, Shuhong; Feng, Andrew
MoDDM: Text-to-Motion Synthesis using Discrete Diffusion Model Proceedings Article
In: 2023.
Abstract | Links | BibTeX | Tags: VGL
@inproceedings{chemburkar_moddm_2023,
title = {MoDDM: Text-to-Motion Synthesis using Discrete Diffusion Model},
author = {Ankur Chemburkar and Shuhong Lu and Andrew Feng},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://papers.bmvc2023.org/0624.pdf},
year = {2023},
date = {2023-01-01},
abstract = {We present the motion discrete diffusion model (MoDDM) for synthesizing human motion from text descriptions that addresses challenges in cross-modal mapping and motion diversity. The previous methods that utilized variational autoencoder (VAE) to learn the latent distributions for text-to-motion synthesis tend to produce motions with less diversity and fidelity. While the diffusion models show promising results by generating high quality motions, they require higher computational costs and may produce motions less aligned with the input text. The proposed method combines the discrete latent space and diffusion models to learn an expressive conditional probabilistic mapping for motion synthesis. Our method utilizes vector quantization variational autoencoder (VQ-VAE) to learn discrete motion tokens and then applies discrete denoising diffusion probabilistic models (D3PM) to learn the conditional probability distributions for the motion tokens. The discrete classifier-free guidance is further utilized in the training process with proper guidance scale for aligning the motions and the corresponding text descriptions. By learning the denoising model in the discrete latent space, the method produces high quality motion results while greatly reducing computational costs compared to training the diffusion models on raw motion sequences. The evaluation results show that the proposed approach outperforms previous methods in both motion quality and text-to-motion matching accuracy.},
keywords = {VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Liu, Shichen; Cai, Yunxuan; Chen, Haiwei; Zhou, Yichao; Zhao, Yajie
Rapid Face Asset Acquisition with Recurrent Feature Alignment Journal Article
In: ACM Trans. Graph., vol. 41, no. 6, pp. 214:1–214:17, 2022, ISSN: 0730-0301.
Abstract | Links | BibTeX | Tags: DTIC, VGL
@article{liu_rapid_2022,
title = {Rapid Face Asset Acquisition with Recurrent Feature Alignment},
author = {Shichen Liu and Yunxuan Cai and Haiwei Chen and Yichao Zhou and Yajie Zhao},
url = {https://dl.acm.org/doi/10.1145/3550454.3555509},
doi = {10.1145/3550454.3555509},
issn = {0730-0301},
year = {2022},
date = {2022-11-01},
urldate = {2023-03-31},
journal = {ACM Trans. Graph.},
volume = {41},
number = {6},
pages = {214:1–214:17},
abstract = {We present Recurrent Feature Alignment (ReFA), an end-to-end neural network for the very rapid creation of production-grade face assets from multi-view images. ReFA is on par with the industrial pipelines in quality for producing accurate, complete, registered, and textured assets directly applicable to physically-based rendering, but produces the asset end-to-end, fully automatically at a significantly faster speed at 4.5 FPS, which is unprecedented among neural-based techniques. Our method represents face geometry as a position map in the UV space. The network first extracts per-pixel features in both the multi-view image space and the UV space. A recurrent module then iteratively optimizes the geometry by projecting the image-space features to the UV space and comparing them with a reference UV-space feature. The optimized geometry then provides pixel-aligned signals for the inference of high-resolution textures. Experiments have validated that ReFA achieves a median error of 0.603mm in geometry reconstruction, is robust to extreme pose and expression, and excels in sparse-view settings. We believe that the progress achieved by our network enables lightweight, fast face assets acquisition that significantly boosts the downstream applications, such as avatar creation and facial performance capture. It will also enable massive database capturing for deep learning purposes.},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {article}
}
Kuang, Zhengfei; Li, Jiaman; He, Mingming; Wang, Tong; Zhao, Yajie
DenseGAP: Graph-Structured Dense Correspondence Learning with Anchor Points Proceedings Article
In: pp. 542–549, IEEE Computer Society, 2022, ISBN: 978-1-66549-062-7.
Abstract | Links | BibTeX | Tags: VGL
@inproceedings{kuang_densegap_2022,
title = {DenseGAP: Graph-Structured Dense Correspondence Learning with Anchor Points},
author = {Zhengfei Kuang and Jiaman Li and Mingming He and Tong Wang and Yajie Zhao},
url = {https://www.computer.org/csdl/proceedings-article/icpr/2022/09956472/1IHpppIuqOc},
doi = {10.1109/ICPR56361.2022.9956472},
isbn = {978-1-66549-062-7},
year = {2022},
date = {2022-08-01},
urldate = {2023-03-31},
pages = {542–549},
publisher = {IEEE Computer Society},
abstract = {Establishing dense correspondence between two images is a fundamental computer vision problem, which is typically tackled by matching local feature descriptors. However, without global awareness, such local features are often insufficient for disambiguating similar regions. And computing the pairwise feature correlation across images is both computation-expensive and memory-intensive. To make the local features aware of the global context and improve their matching accuracy, we introduce DenseGAP, a new solution for efficient Dense correspondence learning with a Graph-structured neural network conditioned on Anchor Points. Specifically, we first propose a graph structure that utilizes anchor points to provide sparse but reliable prior on inter- and intra-image context and propagates them to all image points via directed edges. We also design a graph-structured network to broadcast multi-level contexts via light-weighted message-passing layers and generate high-resolution feature maps at low memory cost. Finally, based on the predicted feature maps, we introduce a coarse-to-fine framework for accurate correspondence prediction using cycle consistency. Our feature descriptors capture both local and global information, thus enabling a continuous feature field for querying arbitrary points at high resolution. Through comprehensive ablative experiments and evaluations on large-scale indoor and outdoor datasets, we demonstrate that our method advances the state-of-the-art of correspondence learning on most benchmarks.},
keywords = {VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Liu, Jiayi; Chen, Weikai; Liu, Shichen; Zhao, Yajie
Exemplar-based Pattern Synthesis with Implicit Periodic Field Network Proceedings Article
In: 2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3698–3707, IEEE, New Orleans, LA, USA, 2022, ISBN: 978-1-66546-946-3.
Links | BibTeX | Tags: UARC, VGL
@inproceedings{chen_exemplar-based_2022,
title = {Exemplar-based Pattern Synthesis with Implicit Periodic Field Network},
author = {Haiwei Chen and Jiayi Liu and Weikai Chen and Shichen Liu and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9879904/},
doi = {10.1109/CVPR52688.2022.00369},
isbn = {978-1-66546-946-3},
year = {2022},
date = {2022-06-01},
urldate = {2023-02-10},
booktitle = {2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
pages = {3698–3707},
publisher = {IEEE},
address = {New Orleans, LA, USA},
keywords = {UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Danieau, Fabien; Guillotel, Philippe; Hoyet, Ludovic; Tonneau, Steve; Zhao, Yajie
Editorial: Creating Lifelike Digital Humans Journal Article
In: Front. Virtual Real., vol. 3, pp. 906118, 2022, ISSN: 2673-4192.
@article{danieau_editorial_2022,
title = {Editorial: Creating Lifelike Digital Humans},
author = {Fabien Danieau and Philippe Guillotel and Ludovic Hoyet and Steve Tonneau and Yajie Zhao},
url = {https://www.frontiersin.org/articles/10.3389/frvir.2022.906118/full},
doi = {10.3389/frvir.2022.906118},
issn = {2673-4192},
year = {2022},
date = {2022-04-01},
urldate = {2024-08-13},
journal = {Front. Virtual Real.},
volume = {3},
pages = {906118},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
Liu, Shichen; Li, Tianye; Chen, Weikai; Li, Hao
A General Differentiable Mesh Renderer for Image-Based 3D Reasoning Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 44, no. 1, pp. 50–62, 2022, ISSN: 1939-3539, (Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence).
Abstract | Links | BibTeX | Tags: VGL
@article{liu_general_2022,
title = {A General Differentiable Mesh Renderer for Image-Based 3D Reasoning},
author = {Shichen Liu and Tianye Li and Weikai Chen and Hao Li},
doi = {10.1109/TPAMI.2020.3007759},
issn = {1939-3539},
year = {2022},
date = {2022-01-01},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {44},
number = {1},
pages = {50–62},
abstract = {Rendering bridges the gap between 2D vision and 3D scenes by simulating the physical process of image formation. By inverting such renderer, one can think of a learning approach to infer 3D information from 2D images. However, standard graphics renderers involve a fundamental step called rasterization, which prevents rendering to be differentiable. Unlike the state-of-the-art differentiable renderers (Kato et al. 2018 and Loper 2018), which only approximate the rendering gradient in the backpropagation, we propose a natually differentiable rendering framework that is able to (1) directly render colorized mesh using differentiable functions and (2) back-propagate efficient supervisions to mesh vertices and their attributes from various forms of image representations. The key to our framework is a novel formulation that views rendering as an aggregation function that fuses the probabilistic contributions of all mesh triangles with respect to the rendered pixels. Such formulation enables our framework to flow gradients to the occluded and distant vertices, which cannot be achieved by the previous state-of-the-arts. We show that by using the proposed renderer, one can achieve significant improvement in 3D unsupervised single-view reconstruction both qualitatively and quantitatively. Experiments also demonstrate that our approach can handle the challenging tasks in image-based shape fitting, which remain nontrivial to existing differentiable renders.},
note = {Conference Name: IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
2021
Li, Jiaman; Villegas, Ruben; Ceylan, Duygu; Yang, Jimei; Kuang, Zhengfei; Li, Hao; Zhao, Yajie
Task-Generic Hierarchical Human Motion Prior using VAEs Proceedings Article
In: 2021 International Conference on 3D Vision (3DV), pp. 771–781, IEEE, London, United Kingdom, 2021, ISBN: 978-1-66542-688-6.
Links | BibTeX | Tags: DTIC, UARC, VGL
@inproceedings{li_task-generic_2021,
title = {Task-Generic Hierarchical Human Motion Prior using VAEs},
author = {Jiaman Li and Ruben Villegas and Duygu Ceylan and Jimei Yang and Zhengfei Kuang and Hao Li and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9665881/},
doi = {10.1109/3DV53792.2021.00086},
isbn = {978-1-66542-688-6},
year = {2021},
date = {2021-12-01},
urldate = {2022-09-22},
booktitle = {2021 International Conference on 3D Vision (3DV)},
pages = {771–781},
publisher = {IEEE},
address = {London, United Kingdom},
keywords = {DTIC, UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Shichen; Zhou, Yichao; Zhao, Yajie
VaPiD: A Rapid Vanishing Point Detector via Learned Optimizers Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 12839–12848, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
Links | BibTeX | Tags: DTIC, UARC, VGL
@inproceedings{liu_vapid_2021,
title = {VaPiD: A Rapid Vanishing Point Detector via Learned Optimizers},
author = {Shichen Liu and Yichao Zhou and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9711313/},
doi = {10.1109/ICCV48922.2021.01262},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-22},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {12839–12848},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {DTIC, UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Tianye; Liu, Shichen; Bolkart, Timo; Liu, Jiayi; Li, Hao; Zhao, Yajie
Topologically Consistent Multi-View Face Inference Using Volumetric Sampling Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 3804–3814, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
Links | BibTeX | Tags: DTIC, UARC, VGL
@inproceedings{li_topologically_2021,
title = {Topologically Consistent Multi-View Face Inference Using Volumetric Sampling},
author = {Tianye Li and Shichen Liu and Timo Bolkart and Jiayi Liu and Hao Li and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/9711264/},
doi = {10.1109/ICCV48922.2021.00380},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-22},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {3804–3814},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {DTIC, UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Xiang, Sitao; Gu, Yuming; Xiang, Pengda; Chai, Menglei; Li, Hao; Zhao, Yajie; He, Mingming
DisUnknown: Distilling Unknown Factors for Disentanglement Learning Proceedings Article
In: 2021 IEEE/CVF International Conference on Computer Vision (ICCV), pp. 14790–14799, IEEE, Montreal, QC, Canada, 2021, ISBN: 978-1-66542-812-5.
Links | BibTeX | Tags: DTIC, UARC, VGL
@inproceedings{xiang_disunknown_2021,
title = {DisUnknown: Distilling Unknown Factors for Disentanglement Learning},
author = {Sitao Xiang and Yuming Gu and Pengda Xiang and Menglei Chai and Hao Li and Yajie Zhao and Mingming He},
url = {https://ieeexplore.ieee.org/document/9709965/},
doi = {10.1109/ICCV48922.2021.01454},
isbn = {978-1-66542-812-5},
year = {2021},
date = {2021-10-01},
urldate = {2022-09-23},
booktitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
pages = {14790–14799},
publisher = {IEEE},
address = {Montreal, QC, Canada},
keywords = {DTIC, UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Xiang, Sitao
Eliminating topological errors in neural network rotation estimation using self-selecting ensembles Journal Article
In: ACM Trans. Graph., vol. 40, no. 4, pp. 167:1–167:21, 2021, ISSN: 0730-0301.
Abstract | Links | BibTeX | Tags: VGL
@article{xiang_eliminating_2021,
title = {Eliminating topological errors in neural network rotation estimation using self-selecting ensembles},
author = {Sitao Xiang},
url = {https://dl.acm.org/doi/10.1145/3450626.3459882},
doi = {10.1145/3450626.3459882},
issn = {0730-0301},
year = {2021},
date = {2021-07-01},
urldate = {2023-03-31},
journal = {ACM Trans. Graph.},
volume = {40},
number = {4},
pages = {167:1–167:21},
abstract = {Many problems in computer graphics and computer vision applications involves inferring a rotation from a variety of different forms of inputs. With the increasing use of deep learning, neural networks have been employed to solve such problems. However, the traditional representations for 3D rotations, the quaternions and Euler angles, are found to be problematic for neural networks in practice, producing seemingly unavoidable large estimation errors. Previous researches has identified the discontinuity of the mapping from SO(3) to the quaternions or Euler angles as the source of such errors, and to solve it, embeddings of SO(3) have been proposed as the output representation of rotation estimation networks instead. In this paper, we argue that the argument against quaternions and Euler angles from local discontinuities of the mappings from SO(3) is flawed, and instead provide a different argument from the global topological properties of SO(3) that also establishes the lower bound of maximum error when using quaternions and Euler angles for rotation estimation networks. Extending from this view, we discover that rotation symmetries in the input object causes additional topological problems that even using embeddings of SO(3) as the output representation would not correctly handle. We propose the self-selecting ensemble, a topologically motivated approach, where the network makes multiple predictions and assigns weights to them. We show theoretically and with experiments that our methods can be combined with a wide range of different rotation representations and can handle all kinds of finite symmetries in 3D rotation estimation problems.},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
Chen, Haiwei; Liu, Shichen; Chen, Weikai; Li, Hao; Hill, Randall
Equivariant Point Network for 3D Point Cloud Analysis Proceedings Article
In: pp. 14514–14523, 2021.
Links | BibTeX | Tags: UARC, VGL
@inproceedings{chen_equivariant_2021,
title = {Equivariant Point Network for 3D Point Cloud Analysis},
author = {Haiwei Chen and Shichen Liu and Weikai Chen and Hao Li and Randall Hill},
url = {https://openaccess.thecvf.com/content/CVPR2021/html/Chen_Equivariant_Point_Network_for_3D_Point_Cloud_Analysis_CVPR_2021_paper.html},
year = {2021},
date = {2021-01-01},
urldate = {2023-03-31},
pages = {14514–14523},
keywords = {UARC, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
2020
Zhou, Yi; Wu, Chenglei; Li, Zimo; Cao, Chen; Ye, Yuting; Saragih, Jason; Li, Hao; Sheikh, Yaser
Fully convolutional mesh autoencoder using efficient spatially varying kernels Proceedings Article
In: Proceedings of the 34th International Conference on Neural Information Processing Systems, pp. 9251–9262, Curran Associates Inc., Red Hook, NY, USA, 2020, ISBN: 978-1-71382-954-6.
@inproceedings{zhou_fully_2020,
title = {Fully convolutional mesh autoencoder using efficient spatially varying kernels},
author = {Yi Zhou and Chenglei Wu and Zimo Li and Chen Cao and Yuting Ye and Jason Saragih and Hao Li and Yaser Sheikh},
isbn = {978-1-71382-954-6},
year = {2020},
date = {2020-12-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the 34th International Conference on Neural Information Processing Systems},
pages = {9251–9262},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
series = {NIPS'20},
abstract = {Learning latent representations of registered meshes is useful for many 3D tasks. Techniques have recently shifted to neural mesh autoencoders. Although they demonstrate higher precision than traditional methods, they remain unable to capture fine-grained deformations. Furthermore, these methods can only be applied to a template-specific surface mesh, and is not applicable to more general meshes, like tetrahedrons and non-manifold meshes. While more general graph convolution methods can be employed, they lack performance in reconstruction precision and require higher memory usage. In this paper, we propose a non-template-specific fully convolutional mesh autoencoder for arbitrary registered mesh data. It is enabled by our novel convolution and (un)pooling operators learned with globally shared weights and locally varying coefficients which can efficiently capture the spatially varying contents presented by irregular mesh connections. Our model outperforms state-of-the-art methods on reconstruction accuracy. In addition, the latent codes of our network are fully localized thanks to the fully convolutional structure, and thus have much higher interpolation capability than many traditional 3D mesh generation models.},
keywords = {VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
0000
Chen, Haiwei; Zhao, Yajie
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting Proceedings Article
In: pp. 7591–7600, 0000.
Abstract | Links | BibTeX | Tags: DTIC, Graphics, VGL
@inproceedings{chen_dont_nodate,
title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting},
author = {Haiwei Chen and Yajie Zhao},
url = {https://openaccess.thecvf.com/content/CVPR2024/html/Chen_Dont_Look_into_the_Dark_Latent_Codes_for_Pluralistic_Image_CVPR_2024_paper.html},
pages = {7591–7600},
abstract = {We present a method for large-mask pluralistic image inpainting based on the generative framework of discrete latent codes. Our method learns latent priors discretized as tokens by only performing computations at the visible locations of the image. This is realized by a restrictive partial encoder that predicts the token label for each visible block a bidirectional transformer that infers the missing labels by only looking at these tokens and a dedicated synthesis network that couples the tokens with the partial image priors to generate coherent and pluralistic complete image even under extreme mask settings. Experiments on public benchmarks validate our design choices as the proposed method outperforms strong baselines in both visual quality and diversity metrics.},
keywords = {DTIC, Graphics, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}