Publications
Search
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hui; Kuang, Bingran; Zhao, Yajie
Camera Calibration using a Single View of a Symmetric Object Proceedings Article
In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2705–2709, IEEE, Seoul, Korea, Republic of, 2024, ISBN: 9798350344851.
@inproceedings{zhang_camera_2024,
title = {Camera Calibration using a Single View of a Symmetric Object},
author = {Hui Zhang and Bingran Kuang and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/10446005/},
doi = {10.1109/ICASSP48485.2024.10446005},
isbn = {9798350344851},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {2705–2709},
publisher = {IEEE},
address = {Seoul, Korea, Republic of},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Hou, Yu; Chen, Meida; Volk, Rebekka; Soibelman, Lucio
In: Journal of Building Engineering, vol. 45, pp. 103380, 2022, ISSN: 23527102.
@article{hou_investigation_2022,
title = {Investigation on performance of RGB point cloud and thermal information data fusion for 3D building thermal map modeling using aerial images under different experimental conditions},
author = {Yu Hou and Meida Chen and Rebekka Volk and Lucio Soibelman},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2352710221012389},
doi = {10.1016/j.jobe.2021.103380},
issn = {23527102},
year = {2022},
date = {2022-01-01},
urldate = {2022-09-28},
journal = {Journal of Building Engineering},
volume = {45},
pages = {103380},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Jiaman; Kuang, Zhengfei; Zhao, Yajie; He, Mingming; Bladin, Karl; Li, Hao
Dynamic Facial Asset and Rig Generation from a Single Scan Journal Article
In: ACM Transactions on Graphics, vol. 39, no. 6, 2020.
@article{li_dynamic_2020,
title = {Dynamic Facial Asset and Rig Generation from a Single Scan},
author = {Jiaman Li and Zhengfei Kuang and Yajie Zhao and Mingming He and Karl Bladin and Hao Li},
url = {https://dl.acm.org/doi/10.1145/3414685.3417817},
doi = {doi/10.1145/3414685.3417817},
year = {2020},
date = {2020-11-01},
journal = {ACM Transactions on Graphics},
volume = {39},
number = {6},
abstract = {The creation of high-fidelity computer-generated (CG) characters for films and games is tied with intensive manual labor, which involves the creation of comprehensive facial assets that are often captured using complex hardware. To simplify and accelerate this digitization process, we propose a framework for the automatic generation of high-quality dynamic facial models, including rigs which can be readily deployed for artists to polish. Our framework takes a single scan as input to generate a set of personalized blendshapes, dynamic textures, as well as secondary facial components (e.g., teeth and eyeballs). Based on a facial database with over 4, 000 scans with pore-level details, varying expressions and identities, we adopt a self-supervised neural network to learn personalized blendshapes from a set of template expressions. We also model the joint distribution between identities and expressions, enabling the inference of a full set of personalized blendshapes with dynamic appearances from a single neutral input scan. Our generated personalized face rig assets are seamlessly compatible with professional production pipelines for facial animation and rendering. We demonstrate a highly robust and effective framework on a wide range of subjects, and showcase high-fidelity facial animations with automatically generated personalized dynamic textures.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Ruilong; Xiu, Yuliang; Saito, Shunsuke; Huang, Zeng; Olszewski, Kyle; Li, Hao
Monocular Real-Time Volumetric Performance Capture Journal Article
In: ResearchGate, pp. 30, 2020.
@article{li_monocular_2020,
title = {Monocular Real-Time Volumetric Performance Capture},
author = {Ruilong Li and Yuliang Xiu and Shunsuke Saito and Zeng Huang and Kyle Olszewski and Hao Li},
url = {https://www.researchgate.net/publication/343279742_Monocular_Real-Time_Volumetric_Performance_Capture},
year = {2020},
date = {2020-07-01},
journal = {ResearchGate},
pages = {30},
abstract = {We present the first approach to volumetric performance capture and novel-view rendering at real-time speed from monocular video, eliminating the need for expensive multi-view systems or cumbersome pre-acquisition of a personalized template model. Our system reconstructs a fully textured 3D human from each frame by leveraging Pixel-Aligned Implicit Function (PIFu). While PIFu achieves high-resolution reconstruction in a memory-efficient manner, its computationally expensive inference prevents us from deploying such a system for real-time applications. To this end, we propose a novel hierarchical surface localization algorithm and a direct rendering method without explicitly extracting surface meshes. By culling unnecessary regions for evaluation in a coarse-to-fine manner, we successfully accelerate the reconstruction by two orders of magnitude from the baseline without compromising the quality. Furthermore, we introduce an Online Hard Example Mining (OHEM) technique that effectively suppresses failure modes due to the rare occurrence of challenging examples. We adaptively update the sampling probability of the training data based on the current reconstruction accuracy, which effectively alleviates reconstruction artifacts. Our experiments and evaluations demonstrate the robustness of our system to various challenging angles, illuminations, poses, and clothing styles. We also show that our approach compares favorably with the state-of-the-art monocular performance capture. Our proposed approach removes the need for multi-view studio settings and enables a consumer-accessible solution for volumetric capture.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Huang, Zeng; Xu, Yuanlu; Lassner, Christoph; Li, Hao; Tung, Tony
ARCH: Animatable Reconstruction of Clothed Humans Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
@inproceedings{huang_arch_2020-1,
title = {ARCH: Animatable Reconstruction of Clothed Humans},
author = {Zeng Huang and Yuanlu Xu and Christoph Lassner and Hao Li and Tony Tung},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d090/1m3nz4mKHzG},
doi = {10.1109/CVPR42600.2020.00316},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {In this paper, we propose ARCH (Animatable Reconstruction of Clothed Humans), a novel end-to-end framework for accurate reconstruction of animation-ready 3D clothed humans from a monocular image. Existing approaches to digitize 3D humans struggle to handle pose variations and recover details. Also, they do not produce models that are animation ready. In contrast, ARCH is a learned pose-aware model that produces detailed 3D rigged full-body human avatars from a single unconstrained RGB image. A Semantic Space and a Semantic Deformation Field are created using a parametric 3D body estimator. They allow the transformation of 2D/3D clothed humans into a canonical space, reducing ambiguities in geometry caused by pose variations and occlusions in training data. Detailed surface geometry and appearance are learned using an implicit function representation with spatial local features. Furthermore, we propose additional per-pixel supervision on the 3D reconstruction using opacity-aware differentiable rendering. Our experiments indicate that ARCH increases the fidelity of the reconstructed humans. We obtain more than 50% lower reconstruction errors for standard metrics compared to state-of-the-art methods on public datasets. We also show numerous qualitative examples of animated, high-quality reconstructed avatars unseen in the literature so far.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, Zeng; Xu, Yuanlu; Lassner, Christoph; Li, Hao; Tung, Tony
ARCH: Animatable Reconstruction of Clothed Humans Proceedings Article
In: Proceedings of the CVPR 2020, pp. 3090–3099, IEEE, Seattle, Washington, 2020.
@inproceedings{huang_arch_2020,
title = {ARCH: Animatable Reconstruction of Clothed Humans},
author = {Zeng Huang and Yuanlu Xu and Christoph Lassner and Hao Li and Tony Tung},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d090/1m3nz4mKHzG},
doi = {10.1109/CVPR42600.2020.00316},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
pages = {3090--3099},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {In this paper, we propose ARCH (Animatable Reconstruction of Clothed Humans), a novel end-to-end framework for accurate reconstruction of animation-ready 3D clothed humans from a monocular image. Existing approaches to digitize 3D humans struggle to handle pose variations and recover details. Also, they do not produce models that are animation ready. In contrast, ARCH is a learned pose-aware model that produces detailed 3D rigged full-body human avatars from a single unconstrained RGB image. A Semantic Space and a Semantic Deformation Field are created using a parametric 3D body estimator. They allow the transformation of 2D/3D clothed humans into a canonical space, reducing ambiguities in geometry caused by pose variations and occlusions in training data. Detailed surface geometry and appearance are learned using an implicit function representation with spatial local features. Furthermore, we propose additional per-pixel supervision on the 3D reconstruction using opacity-aware differentiable rendering. Our experiments indicate that ARCH increases the fidelity of the reconstructed humans. We obtain more than 50% lower reconstruction errors for standard metrics compared to state-of-the-art methods on public datasets. We also show numerous qualitative examples of animated, high-quality reconstructed avatars unseen in the literature so far.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Ruilong; Bladin, Karl; Zhao, Yajie; Chinara, Chinmay; Ingraham, Owen; Xiang, Pengda; Ren, Xinglei; Prasad, Pratusha; Kishore, Bipin; Xing, Jun; Li, Hao
Learning Formation of Physically-Based Face Attributes Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
@inproceedings{li_learning_2020,
title = {Learning Formation of Physically-Based Face Attributes},
author = {Ruilong Li and Karl Bladin and Yajie Zhao and Chinmay Chinara and Owen Ingraham and Pengda Xiang and Xinglei Ren and Pratusha Prasad and Bipin Kishore and Jun Xing and Hao Li},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d407/1m3oiaP9ouQ},
doi = {10.1109/CVPR42600.2020.00347},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {Based on a combined data set of 4000 high resolution facial scans, we introduce a non-linear morphable face model, capable of producing multifarious face geometry of pore-level resolution, coupled with material attributes for use in physically-based rendering. We aim to maximize the variety of face identities, while increasing the robustness of correspondence between unique components, including middle-frequency geometry, albedo maps, specular intensity maps and high-frequency displacement details. Our deep learning based generative model learns to correlate albedo and geometry, which ensures the anatomical correctness of the generated assets. We demonstrate potential use of our generative model for novel identity generation, model fitting, interpolation, animation, high fidelity data visualization, and low-to-high resolution data domain transferring. We hope the release of this generative model will encourage further cooperation between all graphics, vision, and data focused professionals, while demonstrating the cumulative value of every individual’s complete biometric profile.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Olszewski, Kyle; Ceylan, Duygu; Xing, Jun; Echevarria, Jose; Chen, Zhili; Chen, Weikai; Li, Hao
Intuitive, Interactive Beard and Hair Synthesis with Generative Models Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
@inproceedings{olszewski_intuitive_2020,
title = {Intuitive, Interactive Beard and Hair Synthesis with Generative Models},
author = {Kyle Olszewski and Duygu Ceylan and Jun Xing and Jose Echevarria and Zhili Chen and Weikai Chen and Hao Li},
url = {http://arxiv.org/abs/2004.06848},
doi = {10.1109/CVPR42600.2020.00747},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {We present an interactive approach to synthesizing realistic variations in facial hair in images, ranging from subtle edits to existing hair to the addition of complex and challenging hair in images of clean-shaven subjects. To circumvent the tedious and computationally expensive tasks of modeling, rendering and compositing the 3D geometry of the target hairstyle using the traditional graphics pipeline, we employ a neural network pipeline that synthesizes realistic and detailed images of facial hair directly in the target image in under one second. The synthesis is controlled by simple and sparse guide strokes from the user defining the general structural and color properties of the target hairstyle. We qualitatively and quantitatively evaluate our chosen method compared to several alternative approaches. We show compelling interactive editing results with a prototype user interface that allows novice users to progressively refine the generated image to match their desired hairstyle, and demonstrate that our approach also allows for flexible and high-fidelity scalp hair synthesis.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Meida; Feng, Andrew; Prasad, Pratusha Bhuvana; McAlinden, Ryan; Soibelman, Lucio; Enloe, Mike
Fully Automated Photogrammetric Data Segmentation and Object Information Extraction Approach for Creating Simulation Terrain Proceedings Article
In: Proceedings of the Interservice/Industry Training, Simulation, and Education Conference (I/ITSEC),, pp. 13, ResearchGate, Orlando, FL, 2020.
@inproceedings{chen_fully_2020,
title = {Fully Automated Photogrammetric Data Segmentation and Object Information Extraction Approach for Creating Simulation Terrain},
author = {Meida Chen and Andrew Feng and Pratusha Bhuvana Prasad and Ryan McAlinden and Lucio Soibelman and Mike Enloe},
url = {https://www.researchgate.net/publication/338557943_Fully_Automated_Photogrammetric_Data_Segmentation_and_Object_Information_Extraction_Approach_for_Creating_Simulation_Terrain},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the Interservice/Industry Training, Simulation, and Education Conference (I/ITSEC),},
pages = {13},
publisher = {ResearchGate},
address = {Orlando, FL},
abstract = {Our previous works have demonstrated that visually realistic 3D meshes can be automatically reconstructed with lowcost, off-the-shelf unmanned aerial systems (UAS) equipped with capable cameras, and efficient photogrammetric software techniques (McAlinden, Suma, Grechkin, & Enloe, 2015; Spicer, McAlinden, Conover, & Adelphi, 2016). However, such generated data do not contain semantic information/features of objects (i.e., man-made objects, vegetation, ground, object materials, etc.) and cannot allow the sophisticated user-level and system-level interaction. Considering the use case of the data in creating realistic virtual environments for training and simulations (i.e., mission planning, rehearsal, threat detection, etc.), segmenting the data and extracting object information are essential tasks. Previous studies have focused on and made valuable contributions to segment Light Detection and Ranging (LIDAR) generated 3D point clouds and classifying ground materials from real-world images. However, only a few studies have focused on the data created using the photogrammetric technique.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Hao
Technical perspective: Photorealistic facial digitization and manipulation Journal Article
In: Communications of the ACM, vol. 62, no. 1, pp. 95–95, 2019, ISSN: 00010782.
@article{li_technical_2019,
title = {Technical perspective: Photorealistic facial digitization and manipulation},
author = {Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3301004.3292037},
doi = {10.1145/3292037},
issn = {00010782},
year = {2019},
date = {2019-12-01},
journal = {Communications of the ACM},
volume = {62},
number = {1},
pages = {95–95},
abstract = {For more than a decade, computer graphics (CG) researchers and visual effects experts have been fascinated with bringing photorealistic digital actors to the screen. Crossing the well-known "uncanny valley" in CG humans has been one of the most difficult and crucial challenges, due to hypersensitivity to synthetic humans lacking even the slightest and most subtle features of genuine human faces. Given sufficient resources and time, photorealistic renderings of digital characters have been achieved in recent years. Some of the most memorable cases are seen in blockbuster movies, such as The Curious Case of Benjamin Button, Furious 7, and Rogue One: A Star Wars Story, in which large teams of highly skilled digital artists use cutting-edge digitization technologies. Despite the progress of 3D-scanning solutions, facial animation systems, and advanced rendering techniques, weeks of manual work are still needed to produce even just a few seconds of animation.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xing, Jun; Nagano, Koki; Chen, Weikai; Xu, Haotian; Wei, Li-yi; Zhao, Yajie; Lu, Jingwan; Kim, Byungmoon; Li, Hao
HairBrush for Immersive Data-Driven Hair Modeling Proceedings Article
In: Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology - UIST '19, pp. 263–279, ACM Press, New Orleans, LA, USA, 2019, ISBN: 978-1-4503-6816-2.
@inproceedings{xing_hairbrush_2019,
title = {HairBrush for Immersive Data-Driven Hair Modeling},
author = {Jun Xing and Koki Nagano and Weikai Chen and Haotian Xu and Li-yi Wei and Yajie Zhao and Jingwan Lu and Byungmoon Kim and Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3332165.3347876},
doi = {10.1145/3332165.3347876},
isbn = {978-1-4503-6816-2},
year = {2019},
date = {2019-10-01},
booktitle = {Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology - UIST '19},
pages = {263–279},
publisher = {ACM Press},
address = {New Orleans, LA, USA},
abstract = {While hair is an essential component of virtual humans, it is also one of the most challenging digital assets to create. Existing automatic techniques lack the generality and flexibility to create rich hair variations, while manual authoring interfaces often require considerable artistic skills and efforts, especially for intricate 3D hair structures that can be difficult to navigate. We propose an interactive hair modeling system that can help create complex hairstyles in minutes or hours that would otherwise take much longer with existing tools. Modelers, including novice users, can focus on the overall hairstyles and local hair deformations, as our system intelligently suggests the desired hair parts. Our method combines the flexibility of manual authoring and the convenience of data-driven automation. Since hair contains intricate 3D structures such as buns, knots, and strands, they are inherently challenging to create using traditional 2D interfaces. Our system provides a new 3D hair authoring interface for immersive interaction in virtual reality (VR). Users can draw high-level guide strips, from which our system predicts the most plausible hairstyles via a deep neural network trained from a professionally curated dataset. Each hairstyle in our dataset is composed of multiple variations, serving as blend-shapes to fit the user drawings via global blending and local deformation. The fitted hair models are visualized as interactive suggestions that the user can select, modify, or ignore. We conducted a user study to confirm that our system can significantly reduce manual labor while improve the output quality for modeling a variety of head and facial hairstyles that are challenging to create via existing techniques.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Olszewski, Kyle; Tulyakov, Sergey; Woodford, Oliver; Li, Hao; Luo, Linjie
Transformable Bottleneck Networks Journal Article
In: arXiv:1904.06458 [cs], 2019.
@article{olszewski_transformable_2019,
title = {Transformable Bottleneck Networks},
author = {Kyle Olszewski and Sergey Tulyakov and Oliver Woodford and Hao Li and Linjie Luo},
url = {http://arxiv.org/abs/1904.06458},
year = {2019},
date = {2019-08-01},
journal = {arXiv:1904.06458 [cs]},
abstract = {We propose a novel approach to performing fine-grained 3D manipulation of image content via a convolutional neural network, which we call the Transformable Bottleneck Network (TBN). It applies given spatial transformations directly to a volumetric bottleneck within our encoder-bottleneck-decoder architecture. Multi-view supervision encourages the network to learn to spatially disentangle the feature space within the bottleneck. The resulting spatial structure can be manipulated with arbitrary spatial transformations. We demonstrate the efficacy of TBNs for novel view synthesis, achieving state-of-the-art results on a challenging benchmark. We demonstrate that the bottlenecks produced by networks trained for this task contain meaningful spatial structure that allows us to intuitively perform a variety of image manipulations in 3D, well beyond the rigid transformations seen during training. These manipulations include non-uniform scaling, non-rigid warping, and combining content from different images. Finally, we extract explicit 3D structure from the bottleneck, performing impressive 3D reconstruction from a single input image.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhou, Yi; Barnes, Connelly; Lu, Jingwan; Yang, Jimei; Li, Hao
On the Continuity of Rotation Representations in Neural Networks Proceedings Article
In: Proceedings of CVPR, pp. 9, IEEE, Long Beach, CA, 2019.
@inproceedings{zhou_continuity_2019,
title = {On the Continuity of Rotation Representations in Neural Networks},
author = {Yi Zhou and Connelly Barnes and Jingwan Lu and Jimei Yang and Hao Li},
url = {http://openaccess.thecvf.com/content_CVPR_2019/html/Zhou_On_the_Continuity_of_Rotation_Representations_in_Neural_Networks_CVPR_2019_paper.html},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of CVPR},
pages = {9},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {In neural networks, it is often desirable to work with various representations of the same space. For example, 3D rotations can be represented with quaternions or Euler angles. In this paper, we advance a definition of a continuous representation, which can be helpful for training deep neural networks. We relate this to topological concepts such as homeomorphism and embedding. We then investigate what are continuous and discontinuous representations for 2D, 3D, and n-dimensional rotations. We demonstrate that for 3D rotations, all representations are discontinuous in the real Euclidean spaces of four or fewer dimensions. Thus, widely used representations such as quaternions and Euler angles are discontinuous and difficult for neural networks to learn. We show that the 3D rotations have continuous representations in 5D and 6D, which are more suitable for learning. We also present continuous representations for the general case of the n dimensional rotation group SO(n). While our main focus is on rotations, we also show that our constructions apply to other groups such as the orthogonal group and similarity transforms. We finally present empirical results, which show that our continuous rotation representations outperform discontinuous ones for several practical problems in graphics and vision, including a simple autoencoder sanity test, a rotation estimator for 3D point clouds, and an inverse kinematics solver for 3D human poses.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Natsume, Ryota; Saito, Shunsuke; Huang, Zeng; Chen, Weikai; Ma, Chongyang; Li, Hao; Morishima, Shigeo
SiCloPe: Silhouette-Based Clothed People Proceedings Article
In: Proceedings of CVPR, pp. 11, IEEE, Long Beach, CA, 2019.
@inproceedings{natsume_siclope_2019,
title = {SiCloPe: Silhouette-Based Clothed People},
author = {Ryota Natsume and Shunsuke Saito and Zeng Huang and Weikai Chen and Chongyang Ma and Hao Li and Shigeo Morishima},
url = {http://openaccess.thecvf.com/content_CVPR_2019/html/Natsume_SiCloPe_Silhouette-Based_Clothed_People_CVPR_2019_paper.html},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of CVPR},
pages = {11},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {We introduce a new silhouette-based representation for modeling clothed human bodies using deep generative models. Our method can reconstruct a complete and textured 3D model of a person wearing clothes from a single input picture. Inspired by the visual hull algorithm, our implicit representation uses 2D silhouettes and 3D joints of a body pose to describe the immense shape complexity and variations of clothed people. Given a segmented 2D silhouette of a person and its inferred 3D joints from the input picture, we first synthesize consistent silhouettes from novel view points around the subject. The synthesized silhouettes which are the most consistent with the input segmentation are fed into a deep visual hull algorithm for robust 3D shape prediction. We then infer the texture of the subject’s back view using the frontal image and segmentation mask as input to a conditional generative adversarial network. Our experiments demonstrate that our silhouette-based model is an effective representation and the appearance of the back view can be predicted reliably using an image-to-image translation network. While classic methods based on parametric models often fail for single-view images of subjects with challenging clothing, our approach can still produce successful results, which are comparable to those obtained from multi-view input.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Agarwal, Shruti; Farid, Hany; Gu, Yuming; He, Mingming; Nagano, Koki; Li, Hao
Protecting World Leaders Against Deep Fakes Proceedings Article
In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 8, IEEE, Long Beach, CA, 2019.
@inproceedings{agarwal_protecting_2019,
title = {Protecting World Leaders Against Deep Fakes},
author = {Shruti Agarwal and Hany Farid and Yuming Gu and Mingming He and Koki Nagano and Hao Li},
url = {http://openaccess.thecvf.com/content_CVPRW_2019/papers/Media%20Forensics/Agarwal_Protecting_World_Leaders_Against_Deep_Fakes_CVPRW_2019_paper.pdf},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
pages = {8},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {The creation of sophisticated fake videos has been largely relegated to Hollywood studios or state actors. Recent advances in deep learning, however, have made it significantly easier to create sophisticated and compelling fake videos. With relatively modest amounts of data and computing power, the average person can, for example, create a video of a world leader confessing to illegal activity leading to a constitutional crisis, a military leader saying something racially insensitive leading to civil unrest in an area of military activity, or a corporate titan claiming that their profits are weak leading to global stock manipulation. These so called deep fakes pose a significant threat to our democracy, national security, and society. To contend with this growing threat, we describe a forensic technique that models facial expressions and movements that typify an individual’s speaking pattern. Although not visually apparent, these correlations are often violated by the nature of how deep-fake videos are created and can, therefore, be used for authentication.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhao, Yajie; Huang, Zeng; Li, Tianye; Chen, Weikai; LeGendre, Chloe; Ren, Xinglei; Xing, Jun; Shapiro, Ari; Li, Hao
Learning Perspective Undistortion of Portraits Journal Article
In: arXiv:1905.07515 [cs], 2019.
@article{zhao_learning_2019,
title = {Learning Perspective Undistortion of Portraits},
author = {Yajie Zhao and Zeng Huang and Tianye Li and Weikai Chen and Chloe LeGendre and Xinglei Ren and Jun Xing and Ari Shapiro and Hao Li},
url = {http://arxiv.org/abs/1905.07515},
year = {2019},
date = {2019-05-01},
journal = {arXiv:1905.07515 [cs]},
abstract = {Near-range portrait photographs often contain perspective distortion artifacts that bias human perception and challenge both facial recognition and reconstruction techniques. We present the first deep learning based approach to remove such artifacts from unconstrained portraits. In contrast to the previous state-of-the-art approach, our method handles even portraits with extreme perspective distortion, as we avoid the inaccurate and error-prone step of first fitting a 3D face model. Instead, we predict a distortion correction flow map that encodes a per-pixel displacement that removes distortion artifacts when applied to the input image. Our method also automatically infers missing facial features, i.e. occluded ears caused by strong perspective distortion, with coherent details. We demonstrate that our approach significantly outperforms the previous state-of-the-art both qualitatively and quantitatively, particularly for portraits with extreme perspective distortion or facial expressions. We further show that our technique benefits a number of fundamental tasks, significantly improving the accuracy of both face recognition and 3D reconstruction and enables a novel camera calibration technique from a single portrait. Moreover, we also build the first perspective portrait database with a large diversity in identities, expression and poses, which will benefit the related research in this area.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Liu, Shichen; Li, Tianye; Chen, Weikai; Li, Hao
Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning Journal Article
In: arXiv:1904.01786 [cs], 2019.
@article{liu_soft_2019,
title = {Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning},
author = {Shichen Liu and Tianye Li and Weikai Chen and Hao Li},
url = {http://arxiv.org/abs/1904.01786},
year = {2019},
date = {2019-04-01},
journal = {arXiv:1904.01786 [cs]},
abstract = {Rendering bridges the gap between 2D vision and 3D scenes by simulating the physical process of image formation. By inverting such renderer, one can think of a learning approach to infer 3D information from 2D images. However, standard graphics renderers involve a fundamental discretization step called rasterization, which prevents the rendering process to be differentiable, hence able to be learned. Unlike the state-of-the-art differentiable renderers [29, 19], which only approximate the rendering gradient in the back propagation, we propose a truly differentiable rendering framework that is able to (1) directly render colorized mesh using differentiable functions and (2) back-propagate efficient supervision signals to mesh vertices and their attributes from various forms of image representations, including silhouette, shading and color images. The key to our framework is a novel formulation that views rendering as an aggregation function that fuses the probabilistic contributions of all mesh triangles with respect to the rendered pixels. Such formulation enables our framework to flow gradients to the occluded and far-range vertices, which cannot be achieved by the previous state-of-thearts. We show that by using the proposed renderer, one can achieve significant improvement in 3D unsupervised singleview reconstruction both qualitatively and quantitatively. Experiments also demonstrate that our approach is able to handle the challenging tasks in image-based shape fitting, which remain nontrivial to existing differentiable renderers. Code is available at https://github.com/ ShichenLiu/SoftRas.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Artstein, Ron; Gordon, Carla; Sohail, Usman; Merchant, Chirag; Jones, Andrew; Campbell, Julia; Trimmer, Matthew; Bevington, Jeffrey; Engen, COL Christopher; Traum, David
Digital Survivor of Sexual Assault Proceedings Article
In: Proceedings of the 24th International Conference on Intelligent User Interfaces, pp. 417–425, ACM, Marina del Rey, California, 2019, ISBN: 978-1-4503-6272-6.
@inproceedings{artstein_digital_2019,
title = {Digital Survivor of Sexual Assault},
author = {Ron Artstein and Carla Gordon and Usman Sohail and Chirag Merchant and Andrew Jones and Julia Campbell and Matthew Trimmer and Jeffrey Bevington and COL Christopher Engen and David Traum},
url = {https://doi.org/10.1145/3301275.3302303},
doi = {10.1145/3301275.3302303},
isbn = {978-1-4503-6272-6},
year = {2019},
date = {2019-03-01},
booktitle = {Proceedings of the 24th International Conference on Intelligent User Interfaces},
pages = {417–425},
publisher = {ACM},
address = {Marina del Rey, California},
abstract = {The Digital Survivor of Sexual Assault (DS2A) is an interface that allows a user to have a conversational experience with a survivor of sexual assault, using Artificial Intelligence technology and recorded videos. The application uses a statistical classifier to retrieve contextually appropriate pre-recorded video utterances by the survivor, together with dialogue management policies which enable users to conduct simulated conversations with the survivor about the sexual assault, its aftermath, and other pertinent topics. The content in the application has been specifically elicited to support the needs for the training of U.S. Army professionals in the Sexual Harassment/Assault Response and Prevention (SHARP) Program, and the application comes with an instructional support package. The system has been tested with approximately 200 users, and is presently being used in the SHARP Academy's capstone course.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Filter
2024
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: DTIC, Graphics, VGL
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {DTIC, Graphics, VGL},
pubstate = {published},
tppubtype = {article}
}
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
Abstract | Links | BibTeX | Tags: Graphics, VGL
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hui; Kuang, Bingran; Zhao, Yajie
Camera Calibration using a Single View of a Symmetric Object Proceedings Article
In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2705–2709, IEEE, Seoul, Korea, Republic of, 2024, ISBN: 9798350344851.
Links | BibTeX | Tags: Graphics, VGL
@inproceedings{zhang_camera_2024,
title = {Camera Calibration using a Single View of a Symmetric Object},
author = {Hui Zhang and Bingran Kuang and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/10446005/},
doi = {10.1109/ICASSP48485.2024.10446005},
isbn = {9798350344851},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {2705–2709},
publisher = {IEEE},
address = {Seoul, Korea, Republic of},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Hou, Yu; Chen, Meida; Volk, Rebekka; Soibelman, Lucio
In: Journal of Building Engineering, vol. 45, pp. 103380, 2022, ISSN: 23527102.
Links | BibTeX | Tags: Graphics
@article{hou_investigation_2022,
title = {Investigation on performance of RGB point cloud and thermal information data fusion for 3D building thermal map modeling using aerial images under different experimental conditions},
author = {Yu Hou and Meida Chen and Rebekka Volk and Lucio Soibelman},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2352710221012389},
doi = {10.1016/j.jobe.2021.103380},
issn = {23527102},
year = {2022},
date = {2022-01-01},
urldate = {2022-09-28},
journal = {Journal of Building Engineering},
volume = {45},
pages = {103380},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
2020
Li, Jiaman; Kuang, Zhengfei; Zhao, Yajie; He, Mingming; Bladin, Karl; Li, Hao
Dynamic Facial Asset and Rig Generation from a Single Scan Journal Article
In: ACM Transactions on Graphics, vol. 39, no. 6, 2020.
Abstract | Links | BibTeX | Tags: ARO-Coop, Graphics
@article{li_dynamic_2020,
title = {Dynamic Facial Asset and Rig Generation from a Single Scan},
author = {Jiaman Li and Zhengfei Kuang and Yajie Zhao and Mingming He and Karl Bladin and Hao Li},
url = {https://dl.acm.org/doi/10.1145/3414685.3417817},
doi = {doi/10.1145/3414685.3417817},
year = {2020},
date = {2020-11-01},
journal = {ACM Transactions on Graphics},
volume = {39},
number = {6},
abstract = {The creation of high-fidelity computer-generated (CG) characters for films and games is tied with intensive manual labor, which involves the creation of comprehensive facial assets that are often captured using complex hardware. To simplify and accelerate this digitization process, we propose a framework for the automatic generation of high-quality dynamic facial models, including rigs which can be readily deployed for artists to polish. Our framework takes a single scan as input to generate a set of personalized blendshapes, dynamic textures, as well as secondary facial components (e.g., teeth and eyeballs). Based on a facial database with over 4, 000 scans with pore-level details, varying expressions and identities, we adopt a self-supervised neural network to learn personalized blendshapes from a set of template expressions. We also model the joint distribution between identities and expressions, enabling the inference of a full set of personalized blendshapes with dynamic appearances from a single neutral input scan. Our generated personalized face rig assets are seamlessly compatible with professional production pipelines for facial animation and rendering. We demonstrate a highly robust and effective framework on a wide range of subjects, and showcase high-fidelity facial animations with automatically generated personalized dynamic textures.},
keywords = {ARO-Coop, Graphics},
pubstate = {published},
tppubtype = {article}
}
Li, Ruilong; Xiu, Yuliang; Saito, Shunsuke; Huang, Zeng; Olszewski, Kyle; Li, Hao
Monocular Real-Time Volumetric Performance Capture Journal Article
In: ResearchGate, pp. 30, 2020.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{li_monocular_2020,
title = {Monocular Real-Time Volumetric Performance Capture},
author = {Ruilong Li and Yuliang Xiu and Shunsuke Saito and Zeng Huang and Kyle Olszewski and Hao Li},
url = {https://www.researchgate.net/publication/343279742_Monocular_Real-Time_Volumetric_Performance_Capture},
year = {2020},
date = {2020-07-01},
journal = {ResearchGate},
pages = {30},
abstract = {We present the first approach to volumetric performance capture and novel-view rendering at real-time speed from monocular video, eliminating the need for expensive multi-view systems or cumbersome pre-acquisition of a personalized template model. Our system reconstructs a fully textured 3D human from each frame by leveraging Pixel-Aligned Implicit Function (PIFu). While PIFu achieves high-resolution reconstruction in a memory-efficient manner, its computationally expensive inference prevents us from deploying such a system for real-time applications. To this end, we propose a novel hierarchical surface localization algorithm and a direct rendering method without explicitly extracting surface meshes. By culling unnecessary regions for evaluation in a coarse-to-fine manner, we successfully accelerate the reconstruction by two orders of magnitude from the baseline without compromising the quality. Furthermore, we introduce an Online Hard Example Mining (OHEM) technique that effectively suppresses failure modes due to the rare occurrence of challenging examples. We adaptively update the sampling probability of the training data based on the current reconstruction accuracy, which effectively alleviates reconstruction artifacts. Our experiments and evaluations demonstrate the robustness of our system to various challenging angles, illuminations, poses, and clothing styles. We also show that our approach compares favorably with the state-of-the-art monocular performance capture. Our proposed approach removes the need for multi-view studio settings and enables a consumer-accessible solution for volumetric capture.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Huang, Zeng; Xu, Yuanlu; Lassner, Christoph; Li, Hao; Tung, Tony
ARCH: Animatable Reconstruction of Clothed Humans Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{huang_arch_2020-1,
title = {ARCH: Animatable Reconstruction of Clothed Humans},
author = {Zeng Huang and Yuanlu Xu and Christoph Lassner and Hao Li and Tony Tung},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d090/1m3nz4mKHzG},
doi = {10.1109/CVPR42600.2020.00316},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {In this paper, we propose ARCH (Animatable Reconstruction of Clothed Humans), a novel end-to-end framework for accurate reconstruction of animation-ready 3D clothed humans from a monocular image. Existing approaches to digitize 3D humans struggle to handle pose variations and recover details. Also, they do not produce models that are animation ready. In contrast, ARCH is a learned pose-aware model that produces detailed 3D rigged full-body human avatars from a single unconstrained RGB image. A Semantic Space and a Semantic Deformation Field are created using a parametric 3D body estimator. They allow the transformation of 2D/3D clothed humans into a canonical space, reducing ambiguities in geometry caused by pose variations and occlusions in training data. Detailed surface geometry and appearance are learned using an implicit function representation with spatial local features. Furthermore, we propose additional per-pixel supervision on the 3D reconstruction using opacity-aware differentiable rendering. Our experiments indicate that ARCH increases the fidelity of the reconstructed humans. We obtain more than 50% lower reconstruction errors for standard metrics compared to state-of-the-art methods on public datasets. We also show numerous qualitative examples of animated, high-quality reconstructed avatars unseen in the literature so far.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, Zeng; Xu, Yuanlu; Lassner, Christoph; Li, Hao; Tung, Tony
ARCH: Animatable Reconstruction of Clothed Humans Proceedings Article
In: Proceedings of the CVPR 2020, pp. 3090–3099, IEEE, Seattle, Washington, 2020.
Abstract | Links | BibTeX | Tags: ARO-Coop, Graphics
@inproceedings{huang_arch_2020,
title = {ARCH: Animatable Reconstruction of Clothed Humans},
author = {Zeng Huang and Yuanlu Xu and Christoph Lassner and Hao Li and Tony Tung},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d090/1m3nz4mKHzG},
doi = {10.1109/CVPR42600.2020.00316},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
pages = {3090--3099},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {In this paper, we propose ARCH (Animatable Reconstruction of Clothed Humans), a novel end-to-end framework for accurate reconstruction of animation-ready 3D clothed humans from a monocular image. Existing approaches to digitize 3D humans struggle to handle pose variations and recover details. Also, they do not produce models that are animation ready. In contrast, ARCH is a learned pose-aware model that produces detailed 3D rigged full-body human avatars from a single unconstrained RGB image. A Semantic Space and a Semantic Deformation Field are created using a parametric 3D body estimator. They allow the transformation of 2D/3D clothed humans into a canonical space, reducing ambiguities in geometry caused by pose variations and occlusions in training data. Detailed surface geometry and appearance are learned using an implicit function representation with spatial local features. Furthermore, we propose additional per-pixel supervision on the 3D reconstruction using opacity-aware differentiable rendering. Our experiments indicate that ARCH increases the fidelity of the reconstructed humans. We obtain more than 50% lower reconstruction errors for standard metrics compared to state-of-the-art methods on public datasets. We also show numerous qualitative examples of animated, high-quality reconstructed avatars unseen in the literature so far.},
keywords = {ARO-Coop, Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Ruilong; Bladin, Karl; Zhao, Yajie; Chinara, Chinmay; Ingraham, Owen; Xiang, Pengda; Ren, Xinglei; Prasad, Pratusha; Kishore, Bipin; Xing, Jun; Li, Hao
Learning Formation of Physically-Based Face Attributes Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{li_learning_2020,
title = {Learning Formation of Physically-Based Face Attributes},
author = {Ruilong Li and Karl Bladin and Yajie Zhao and Chinmay Chinara and Owen Ingraham and Pengda Xiang and Xinglei Ren and Pratusha Prasad and Bipin Kishore and Jun Xing and Hao Li},
url = {https://www.computer.org/csdl/proceedings-article/cvpr/2020/716800d407/1m3oiaP9ouQ},
doi = {10.1109/CVPR42600.2020.00347},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {Based on a combined data set of 4000 high resolution facial scans, we introduce a non-linear morphable face model, capable of producing multifarious face geometry of pore-level resolution, coupled with material attributes for use in physically-based rendering. We aim to maximize the variety of face identities, while increasing the robustness of correspondence between unique components, including middle-frequency geometry, albedo maps, specular intensity maps and high-frequency displacement details. Our deep learning based generative model learns to correlate albedo and geometry, which ensures the anatomical correctness of the generated assets. We demonstrate potential use of our generative model for novel identity generation, model fitting, interpolation, animation, high fidelity data visualization, and low-to-high resolution data domain transferring. We hope the release of this generative model will encourage further cooperation between all graphics, vision, and data focused professionals, while demonstrating the cumulative value of every individual’s complete biometric profile.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Olszewski, Kyle; Ceylan, Duygu; Xing, Jun; Echevarria, Jose; Chen, Zhili; Chen, Weikai; Li, Hao
Intuitive, Interactive Beard and Hair Synthesis with Generative Models Proceedings Article
In: Proceedings of the CVPR 2020, IEEE, Seattle, Washington, 2020.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{olszewski_intuitive_2020,
title = {Intuitive, Interactive Beard and Hair Synthesis with Generative Models},
author = {Kyle Olszewski and Duygu Ceylan and Jun Xing and Jose Echevarria and Zhili Chen and Weikai Chen and Hao Li},
url = {http://arxiv.org/abs/2004.06848},
doi = {10.1109/CVPR42600.2020.00747},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of the CVPR 2020},
publisher = {IEEE},
address = {Seattle, Washington},
abstract = {We present an interactive approach to synthesizing realistic variations in facial hair in images, ranging from subtle edits to existing hair to the addition of complex and challenging hair in images of clean-shaven subjects. To circumvent the tedious and computationally expensive tasks of modeling, rendering and compositing the 3D geometry of the target hairstyle using the traditional graphics pipeline, we employ a neural network pipeline that synthesizes realistic and detailed images of facial hair directly in the target image in under one second. The synthesis is controlled by simple and sparse guide strokes from the user defining the general structural and color properties of the target hairstyle. We qualitatively and quantitatively evaluate our chosen method compared to several alternative approaches. We show compelling interactive editing results with a prototype user interface that allows novice users to progressively refine the generated image to match their desired hairstyle, and demonstrate that our approach also allows for flexible and high-fidelity scalp hair synthesis.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Meida; Feng, Andrew; Prasad, Pratusha Bhuvana; McAlinden, Ryan; Soibelman, Lucio; Enloe, Mike
Fully Automated Photogrammetric Data Segmentation and Object Information Extraction Approach for Creating Simulation Terrain Proceedings Article
In: Proceedings of the Interservice/Industry Training, Simulation, and Education Conference (I/ITSEC),, pp. 13, ResearchGate, Orlando, FL, 2020.
Abstract | Links | BibTeX | Tags: Graphics, Narrative, STG, UARC
@inproceedings{chen_fully_2020,
title = {Fully Automated Photogrammetric Data Segmentation and Object Information Extraction Approach for Creating Simulation Terrain},
author = {Meida Chen and Andrew Feng and Pratusha Bhuvana Prasad and Ryan McAlinden and Lucio Soibelman and Mike Enloe},
url = {https://www.researchgate.net/publication/338557943_Fully_Automated_Photogrammetric_Data_Segmentation_and_Object_Information_Extraction_Approach_for_Creating_Simulation_Terrain},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the Interservice/Industry Training, Simulation, and Education Conference (I/ITSEC),},
pages = {13},
publisher = {ResearchGate},
address = {Orlando, FL},
abstract = {Our previous works have demonstrated that visually realistic 3D meshes can be automatically reconstructed with lowcost, off-the-shelf unmanned aerial systems (UAS) equipped with capable cameras, and efficient photogrammetric software techniques (McAlinden, Suma, Grechkin, & Enloe, 2015; Spicer, McAlinden, Conover, & Adelphi, 2016). However, such generated data do not contain semantic information/features of objects (i.e., man-made objects, vegetation, ground, object materials, etc.) and cannot allow the sophisticated user-level and system-level interaction. Considering the use case of the data in creating realistic virtual environments for training and simulations (i.e., mission planning, rehearsal, threat detection, etc.), segmenting the data and extracting object information are essential tasks. Previous studies have focused on and made valuable contributions to segment Light Detection and Ranging (LIDAR) generated 3D point clouds and classifying ground materials from real-world images. However, only a few studies have focused on the data created using the photogrammetric technique.},
keywords = {Graphics, Narrative, STG, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
2019
Li, Hao
Technical perspective: Photorealistic facial digitization and manipulation Journal Article
In: Communications of the ACM, vol. 62, no. 1, pp. 95–95, 2019, ISSN: 00010782.
Abstract | Links | BibTeX | Tags: Graphics
@article{li_technical_2019,
title = {Technical perspective: Photorealistic facial digitization and manipulation},
author = {Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3301004.3292037},
doi = {10.1145/3292037},
issn = {00010782},
year = {2019},
date = {2019-12-01},
journal = {Communications of the ACM},
volume = {62},
number = {1},
pages = {95–95},
abstract = {For more than a decade, computer graphics (CG) researchers and visual effects experts have been fascinated with bringing photorealistic digital actors to the screen. Crossing the well-known "uncanny valley" in CG humans has been one of the most difficult and crucial challenges, due to hypersensitivity to synthetic humans lacking even the slightest and most subtle features of genuine human faces. Given sufficient resources and time, photorealistic renderings of digital characters have been achieved in recent years. Some of the most memorable cases are seen in blockbuster movies, such as The Curious Case of Benjamin Button, Furious 7, and Rogue One: A Star Wars Story, in which large teams of highly skilled digital artists use cutting-edge digitization technologies. Despite the progress of 3D-scanning solutions, facial animation systems, and advanced rendering techniques, weeks of manual work are still needed to produce even just a few seconds of animation.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
Xing, Jun; Nagano, Koki; Chen, Weikai; Xu, Haotian; Wei, Li-yi; Zhao, Yajie; Lu, Jingwan; Kim, Byungmoon; Li, Hao
HairBrush for Immersive Data-Driven Hair Modeling Proceedings Article
In: Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology - UIST '19, pp. 263–279, ACM Press, New Orleans, LA, USA, 2019, ISBN: 978-1-4503-6816-2.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{xing_hairbrush_2019,
title = {HairBrush for Immersive Data-Driven Hair Modeling},
author = {Jun Xing and Koki Nagano and Weikai Chen and Haotian Xu and Li-yi Wei and Yajie Zhao and Jingwan Lu and Byungmoon Kim and Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3332165.3347876},
doi = {10.1145/3332165.3347876},
isbn = {978-1-4503-6816-2},
year = {2019},
date = {2019-10-01},
booktitle = {Proceedings of the 32nd Annual ACM Symposium on User Interface Software and Technology - UIST '19},
pages = {263–279},
publisher = {ACM Press},
address = {New Orleans, LA, USA},
abstract = {While hair is an essential component of virtual humans, it is also one of the most challenging digital assets to create. Existing automatic techniques lack the generality and flexibility to create rich hair variations, while manual authoring interfaces often require considerable artistic skills and efforts, especially for intricate 3D hair structures that can be difficult to navigate. We propose an interactive hair modeling system that can help create complex hairstyles in minutes or hours that would otherwise take much longer with existing tools. Modelers, including novice users, can focus on the overall hairstyles and local hair deformations, as our system intelligently suggests the desired hair parts. Our method combines the flexibility of manual authoring and the convenience of data-driven automation. Since hair contains intricate 3D structures such as buns, knots, and strands, they are inherently challenging to create using traditional 2D interfaces. Our system provides a new 3D hair authoring interface for immersive interaction in virtual reality (VR). Users can draw high-level guide strips, from which our system predicts the most plausible hairstyles via a deep neural network trained from a professionally curated dataset. Each hairstyle in our dataset is composed of multiple variations, serving as blend-shapes to fit the user drawings via global blending and local deformation. The fitted hair models are visualized as interactive suggestions that the user can select, modify, or ignore. We conducted a user study to confirm that our system can significantly reduce manual labor while improve the output quality for modeling a variety of head and facial hairstyles that are challenging to create via existing techniques.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Olszewski, Kyle; Tulyakov, Sergey; Woodford, Oliver; Li, Hao; Luo, Linjie
Transformable Bottleneck Networks Journal Article
In: arXiv:1904.06458 [cs], 2019.
Abstract | Links | BibTeX | Tags: Graphics
@article{olszewski_transformable_2019,
title = {Transformable Bottleneck Networks},
author = {Kyle Olszewski and Sergey Tulyakov and Oliver Woodford and Hao Li and Linjie Luo},
url = {http://arxiv.org/abs/1904.06458},
year = {2019},
date = {2019-08-01},
journal = {arXiv:1904.06458 [cs]},
abstract = {We propose a novel approach to performing fine-grained 3D manipulation of image content via a convolutional neural network, which we call the Transformable Bottleneck Network (TBN). It applies given spatial transformations directly to a volumetric bottleneck within our encoder-bottleneck-decoder architecture. Multi-view supervision encourages the network to learn to spatially disentangle the feature space within the bottleneck. The resulting spatial structure can be manipulated with arbitrary spatial transformations. We demonstrate the efficacy of TBNs for novel view synthesis, achieving state-of-the-art results on a challenging benchmark. We demonstrate that the bottlenecks produced by networks trained for this task contain meaningful spatial structure that allows us to intuitively perform a variety of image manipulations in 3D, well beyond the rigid transformations seen during training. These manipulations include non-uniform scaling, non-rigid warping, and combining content from different images. Finally, we extract explicit 3D structure from the bottleneck, performing impressive 3D reconstruction from a single input image.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
Zhou, Yi; Barnes, Connelly; Lu, Jingwan; Yang, Jimei; Li, Hao
On the Continuity of Rotation Representations in Neural Networks Proceedings Article
In: Proceedings of CVPR, pp. 9, IEEE, Long Beach, CA, 2019.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{zhou_continuity_2019,
title = {On the Continuity of Rotation Representations in Neural Networks},
author = {Yi Zhou and Connelly Barnes and Jingwan Lu and Jimei Yang and Hao Li},
url = {http://openaccess.thecvf.com/content_CVPR_2019/html/Zhou_On_the_Continuity_of_Rotation_Representations_in_Neural_Networks_CVPR_2019_paper.html},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of CVPR},
pages = {9},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {In neural networks, it is often desirable to work with various representations of the same space. For example, 3D rotations can be represented with quaternions or Euler angles. In this paper, we advance a definition of a continuous representation, which can be helpful for training deep neural networks. We relate this to topological concepts such as homeomorphism and embedding. We then investigate what are continuous and discontinuous representations for 2D, 3D, and n-dimensional rotations. We demonstrate that for 3D rotations, all representations are discontinuous in the real Euclidean spaces of four or fewer dimensions. Thus, widely used representations such as quaternions and Euler angles are discontinuous and difficult for neural networks to learn. We show that the 3D rotations have continuous representations in 5D and 6D, which are more suitable for learning. We also present continuous representations for the general case of the n dimensional rotation group SO(n). While our main focus is on rotations, we also show that our constructions apply to other groups such as the orthogonal group and similarity transforms. We finally present empirical results, which show that our continuous rotation representations outperform discontinuous ones for several practical problems in graphics and vision, including a simple autoencoder sanity test, a rotation estimator for 3D point clouds, and an inverse kinematics solver for 3D human poses.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Natsume, Ryota; Saito, Shunsuke; Huang, Zeng; Chen, Weikai; Ma, Chongyang; Li, Hao; Morishima, Shigeo
SiCloPe: Silhouette-Based Clothed People Proceedings Article
In: Proceedings of CVPR, pp. 11, IEEE, Long Beach, CA, 2019.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{natsume_siclope_2019,
title = {SiCloPe: Silhouette-Based Clothed People},
author = {Ryota Natsume and Shunsuke Saito and Zeng Huang and Weikai Chen and Chongyang Ma and Hao Li and Shigeo Morishima},
url = {http://openaccess.thecvf.com/content_CVPR_2019/html/Natsume_SiCloPe_Silhouette-Based_Clothed_People_CVPR_2019_paper.html},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of CVPR},
pages = {11},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {We introduce a new silhouette-based representation for modeling clothed human bodies using deep generative models. Our method can reconstruct a complete and textured 3D model of a person wearing clothes from a single input picture. Inspired by the visual hull algorithm, our implicit representation uses 2D silhouettes and 3D joints of a body pose to describe the immense shape complexity and variations of clothed people. Given a segmented 2D silhouette of a person and its inferred 3D joints from the input picture, we first synthesize consistent silhouettes from novel view points around the subject. The synthesized silhouettes which are the most consistent with the input segmentation are fed into a deep visual hull algorithm for robust 3D shape prediction. We then infer the texture of the subject’s back view using the frontal image and segmentation mask as input to a conditional generative adversarial network. Our experiments demonstrate that our silhouette-based model is an effective representation and the appearance of the back view can be predicted reliably using an image-to-image translation network. While classic methods based on parametric models often fail for single-view images of subjects with challenging clothing, our approach can still produce successful results, which are comparable to those obtained from multi-view input.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Agarwal, Shruti; Farid, Hany; Gu, Yuming; He, Mingming; Nagano, Koki; Li, Hao
Protecting World Leaders Against Deep Fakes Proceedings Article
In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, pp. 8, IEEE, Long Beach, CA, 2019.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{agarwal_protecting_2019,
title = {Protecting World Leaders Against Deep Fakes},
author = {Shruti Agarwal and Hany Farid and Yuming Gu and Mingming He and Koki Nagano and Hao Li},
url = {http://openaccess.thecvf.com/content_CVPRW_2019/papers/Media%20Forensics/Agarwal_Protecting_World_Leaders_Against_Deep_Fakes_CVPRW_2019_paper.pdf},
year = {2019},
date = {2019-06-01},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
pages = {8},
publisher = {IEEE},
address = {Long Beach, CA},
abstract = {The creation of sophisticated fake videos has been largely relegated to Hollywood studios or state actors. Recent advances in deep learning, however, have made it significantly easier to create sophisticated and compelling fake videos. With relatively modest amounts of data and computing power, the average person can, for example, create a video of a world leader confessing to illegal activity leading to a constitutional crisis, a military leader saying something racially insensitive leading to civil unrest in an area of military activity, or a corporate titan claiming that their profits are weak leading to global stock manipulation. These so called deep fakes pose a significant threat to our democracy, national security, and society. To contend with this growing threat, we describe a forensic technique that models facial expressions and movements that typify an individual’s speaking pattern. Although not visually apparent, these correlations are often violated by the nature of how deep-fake videos are created and can, therefore, be used for authentication.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhao, Yajie; Huang, Zeng; Li, Tianye; Chen, Weikai; LeGendre, Chloe; Ren, Xinglei; Xing, Jun; Shapiro, Ari; Li, Hao
Learning Perspective Undistortion of Portraits Journal Article
In: arXiv:1905.07515 [cs], 2019.
Abstract | Links | BibTeX | Tags: Graphics, Virtual Humans
@article{zhao_learning_2019,
title = {Learning Perspective Undistortion of Portraits},
author = {Yajie Zhao and Zeng Huang and Tianye Li and Weikai Chen and Chloe LeGendre and Xinglei Ren and Jun Xing and Ari Shapiro and Hao Li},
url = {http://arxiv.org/abs/1905.07515},
year = {2019},
date = {2019-05-01},
journal = {arXiv:1905.07515 [cs]},
abstract = {Near-range portrait photographs often contain perspective distortion artifacts that bias human perception and challenge both facial recognition and reconstruction techniques. We present the first deep learning based approach to remove such artifacts from unconstrained portraits. In contrast to the previous state-of-the-art approach, our method handles even portraits with extreme perspective distortion, as we avoid the inaccurate and error-prone step of first fitting a 3D face model. Instead, we predict a distortion correction flow map that encodes a per-pixel displacement that removes distortion artifacts when applied to the input image. Our method also automatically infers missing facial features, i.e. occluded ears caused by strong perspective distortion, with coherent details. We demonstrate that our approach significantly outperforms the previous state-of-the-art both qualitatively and quantitatively, particularly for portraits with extreme perspective distortion or facial expressions. We further show that our technique benefits a number of fundamental tasks, significantly improving the accuracy of both face recognition and 3D reconstruction and enables a novel camera calibration technique from a single portrait. Moreover, we also build the first perspective portrait database with a large diversity in identities, expression and poses, which will benefit the related research in this area.},
keywords = {Graphics, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Liu, Shichen; Li, Tianye; Chen, Weikai; Li, Hao
Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning Journal Article
In: arXiv:1904.01786 [cs], 2019.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{liu_soft_2019,
title = {Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning},
author = {Shichen Liu and Tianye Li and Weikai Chen and Hao Li},
url = {http://arxiv.org/abs/1904.01786},
year = {2019},
date = {2019-04-01},
journal = {arXiv:1904.01786 [cs]},
abstract = {Rendering bridges the gap between 2D vision and 3D scenes by simulating the physical process of image formation. By inverting such renderer, one can think of a learning approach to infer 3D information from 2D images. However, standard graphics renderers involve a fundamental discretization step called rasterization, which prevents the rendering process to be differentiable, hence able to be learned. Unlike the state-of-the-art differentiable renderers [29, 19], which only approximate the rendering gradient in the back propagation, we propose a truly differentiable rendering framework that is able to (1) directly render colorized mesh using differentiable functions and (2) back-propagate efficient supervision signals to mesh vertices and their attributes from various forms of image representations, including silhouette, shading and color images. The key to our framework is a novel formulation that views rendering as an aggregation function that fuses the probabilistic contributions of all mesh triangles with respect to the rendered pixels. Such formulation enables our framework to flow gradients to the occluded and far-range vertices, which cannot be achieved by the previous state-of-thearts. We show that by using the proposed renderer, one can achieve significant improvement in 3D unsupervised singleview reconstruction both qualitatively and quantitatively. Experiments also demonstrate that our approach is able to handle the challenging tasks in image-based shape fitting, which remain nontrivial to existing differentiable renderers. Code is available at https://github.com/ ShichenLiu/SoftRas.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Artstein, Ron; Gordon, Carla; Sohail, Usman; Merchant, Chirag; Jones, Andrew; Campbell, Julia; Trimmer, Matthew; Bevington, Jeffrey; Engen, COL Christopher; Traum, David
Digital Survivor of Sexual Assault Proceedings Article
In: Proceedings of the 24th International Conference on Intelligent User Interfaces, pp. 417–425, ACM, Marina del Rey, California, 2019, ISBN: 978-1-4503-6272-6.
Abstract | Links | BibTeX | Tags: DoD, Graphics, MedVR, UARC, Virtual Humans
@inproceedings{artstein_digital_2019,
title = {Digital Survivor of Sexual Assault},
author = {Ron Artstein and Carla Gordon and Usman Sohail and Chirag Merchant and Andrew Jones and Julia Campbell and Matthew Trimmer and Jeffrey Bevington and COL Christopher Engen and David Traum},
url = {https://doi.org/10.1145/3301275.3302303},
doi = {10.1145/3301275.3302303},
isbn = {978-1-4503-6272-6},
year = {2019},
date = {2019-03-01},
booktitle = {Proceedings of the 24th International Conference on Intelligent User Interfaces},
pages = {417–425},
publisher = {ACM},
address = {Marina del Rey, California},
abstract = {The Digital Survivor of Sexual Assault (DS2A) is an interface that allows a user to have a conversational experience with a survivor of sexual assault, using Artificial Intelligence technology and recorded videos. The application uses a statistical classifier to retrieve contextually appropriate pre-recorded video utterances by the survivor, together with dialogue management policies which enable users to conduct simulated conversations with the survivor about the sexual assault, its aftermath, and other pertinent topics. The content in the application has been specifically elicited to support the needs for the training of U.S. Army professionals in the Sexual Harassment/Assault Response and Prevention (SHARP) Program, and the application comes with an instructional support package. The system has been tested with approximately 200 users, and is presently being used in the SHARP Academy's capstone course.},
keywords = {DoD, Graphics, MedVR, UARC, Virtual Humans},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Saito, Shunsuke; Hu, Liwen; Ma, Chongyang; Ibayashi, Hikaru; Luo, Linjie; Li, Hao
3D Hair Synthesis Using Volumetric Variational Autoencoders Proceedings Article
In: SIGGRAPH Asia 2018 Technical Papers on - SIGGRAPH Asia '18, pp. 1–12, ACM Press, Tokyo, Japan, 2018, ISBN: 978-1-4503-6008-1.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{saito_3d_2018,
title = {3D Hair Synthesis Using Volumetric Variational Autoencoders},
author = {Shunsuke Saito and Liwen Hu and Chongyang Ma and Hikaru Ibayashi and Linjie Luo and Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3272127.3275019},
doi = {10.1145/3272127.3275019},
isbn = {978-1-4503-6008-1},
year = {2018},
date = {2018-12-01},
booktitle = {SIGGRAPH Asia 2018 Technical Papers on - SIGGRAPH Asia '18},
pages = {1–12},
publisher = {ACM Press},
address = {Tokyo, Japan},
abstract = {Recent advances in single-view 3D hair digitization have made the creation of high-quality CG characters scalable and accessible to end-users, enabling new forms of personalized VR and gaming experiences. To handle the complexity and variety of hair structures, most cutting-edge techniques rely on the successful retrieval of a particular hair model from a comprehensive hair database. Not only are the aforementioned data-driven methods storage intensive, but they are also prone to failure for highly unconstrained input images, complicated hairstyles, and failed face detection. Instead of using a large collection of 3D hair models directly, we propose to represent the manifold of 3D hairstyles implicitly through a compact latent space of a volumetric variational autoencoder (VAE). This deep neural network is trained with volumetric orientation field representations of 3D hair models and can synthesize new hairstyles from a compressed code. To enable end-to-end 3D hair inference, we train an additional embedding network to predict the code in the VAE latent space from any input image. Strand-level hairstyles can then be generated from the predicted volumetric representation. Our fully automatic framework does not require any ad-hoc face fitting, intermediate classification and segmentation, or hairstyle database retrieval. Our hair synthesis approach is significantly more robust and can handle a much wider variation of hairstyles than state-of-the-art data-driven hair modeling techniques with challenging inputs, including photos that are low-resolution, overexposured, or contain extreme head poses. The storage requirements are minimal and a 3D hair model can be produced from an image in a second. Our evaluations also show that successful reconstructions are possible from highly stylized cartoon images, non-human subjects, and pictures taken from behind a person. Our approach is particularly well suited for continuous and plausible hair interpolation between very different hairstyles.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Wei, Lingyu; Hu, Liwen; Kim, Vladimir; Yumer, Ersin; Li, Hao
Real-Time Hair Rendering using Sequential Adversarial Networks Proceedings Article
In: Proceedings of the 15th European Conference on Computer Vision, Computer Vision Foundation, Munich, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{wei_real-time_2018,
title = {Real-Time Hair Rendering using Sequential Adversarial Networks},
author = {Lingyu Wei and Liwen Hu and Vladimir Kim and Ersin Yumer and Hao Li},
url = {http://openaccess.thecvf.com/content_ECCV_2018/papers/Lingyu_Wei_Real-Time_Hair_Rendering_ECCV_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {Proceedings of the 15th European Conference on Computer Vision},
publisher = {Computer Vision Foundation},
address = {Munich, Germany},
abstract = {We present an adversarial network for rendering photorealistic hair as an alternative to conventional computer graphics pipelines. Our deep learning approach does not require low-level parameter tuning nor ad-hoc asset design. Our method simply takes a strand-based 3D hair model as input and provides intuitive user-control for color and lighting through reference images. To handle the diversity of hairstyles and its appearance complexity, we disentangle hair structure, color, and illumination properties using a sequential GAN architecture and a semisupervised training approach. We also introduce an intermediate edge activation map to orientation field conversion step to ensure a successful CG-to-photoreal transition, while preserving the hair structures of the original input data. As we only require a feed-forward pass through the network, our rendering performs in real-time. We demonstrate the synthesis of photorealistic hair images on a wide range of intricate hairstyles and compare our technique with state-of-the-art hair rendering methods.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Song, Yuhang; Yang, Chao; Lin, Zhe; Liu, Xiaofeng; Li, Hao; Huang, Qin
Contextual Based Image Inpainting: Infer, Match and Translate Proceedings Article
In: Proceedings of the 15th European Conference on Computer Vision, Computer Vision Foundation, Munich, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{song_contextual_2018,
title = {Contextual Based Image Inpainting: Infer, Match and Translate},
author = {Yuhang Song and Chao Yang and Zhe Lin and Xiaofeng Liu and Hao Li and Qin Huang},
url = {http://openaccess.thecvf.com/content_ECCV_2018/papers/Yuhang_Song_Contextual_Based_Image_ECCV_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {Proceedings of the 15th European Conference on Computer Vision},
publisher = {Computer Vision Foundation},
address = {Munich, Germany},
abstract = {We study the task of image inpainting, which is to fill in the missing region of an incomplete image with plausible contents. To this end, we propose a learning-based approach to generate visually coherent completion given a high-resolution image with missing components. In order to overcome the difficulty to directly learn the distribution of highdimensional image data, we divide the task into inference and translation as two separate steps and model each step with a deep neural network. We also use simple heuristics to guide the propagation of local textures from the boundary to the hole. We show that, by using such techniques, inpainting reduces to the problem of learning two image-feature translation functions in much smaller space and hence easier to train. We evaluate our method on several public datasets and show that we generate results of better visual quality than previous state-of-the-art methods.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Zheng, Zerong; Yu, Tao; Li, Hao; Guo, Kaiwen; Dai, Qionghai; Fang, Lu; Liu, Yebin
HybridFusion: Real-Time Performance Capture Using a Single Depth Sensor and Sparse IMUs Proceedings Article
In: Proceedings of the 15th European Conference on Computer Vision, Computer Vision Foundation, Munich, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{zheng_hybridfusion_2018,
title = {HybridFusion: Real-Time Performance Capture Using a Single Depth Sensor and Sparse IMUs},
author = {Zerong Zheng and Tao Yu and Hao Li and Kaiwen Guo and Qionghai Dai and Lu Fang and Yebin Liu},
url = {http://openaccess.thecvf.com/content_ECCV_2018/papers/Zerong_Zheng_HybridFusion_Real-Time_Performance_ECCV_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {Proceedings of the 15th European Conference on Computer Vision},
publisher = {Computer Vision Foundation},
address = {Munich, Germany},
abstract = {We propose a light-weight yet highly robust method for realtime human performance capture based on a single depth camera and sparse inertial measurement units (IMUs). Our method combines nonrigid surface tracking and volumetric fusion to simultaneously reconstruct challenging motions, detailed geometries and the inner human body of a clothed subject. The proposed hybrid motion tracking algorithm and efficient per-frame sensor calibration technique enable nonrigid surface reconstruction for fast motions and challenging poses with severe occlusions. Significant fusion artifacts are reduced using a new confidence measurement for our adaptive TSDF-based fusion. The above contributions are mutually beneficial in our reconstruction system, which enable practical human performance capture that is real-time, robust, low-cost and easy to deploy. Experiments show that extremely challenging performances and loop closure problems can be handled successfully.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Yi; Hu, Liwen; Xing, Jun; Chen, Weikai; Kung, Han-Wei; Tong, Xin; Li, Hao
HairNet: Single-View Hair Reconstruction using Convolutional Neural Networks Proceedings Article
In: Proceedings of the 15th European Conference on Computer Vision, Computer Vision Foundation, Munich, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{zhou_hairnet_2018,
title = {HairNet: Single-View Hair Reconstruction using Convolutional Neural Networks},
author = {Yi Zhou and Liwen Hu and Jun Xing and Weikai Chen and Han-Wei Kung and Xin Tong and Hao Li},
url = {http://openaccess.thecvf.com/content_ECCV_2018/papers/Yi_Zhou_Single-view_Hair_Reconstruction_ECCV_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {Proceedings of the 15th European Conference on Computer Vision},
publisher = {Computer Vision Foundation},
address = {Munich, Germany},
abstract = {We introduce a deep learning-based method to generate full 3D hair geometry from an unconstrained image. Our method can recover local strand details and has real-time performance. State-of-the-art hair modeling techniques rely on large hairstyle collections for nearest neighbor retrieval and then perform ad-hoc refinement. Our deep learning approach, in contrast, is highly efficient in storage and can run 1000 times faster while generating hair with 30K strands. The convolutional neural network takes the 2D orientation field of a hair image as input and generates strand features that are evenly distributed on the parameterized 2D scalp. We introduce a collision loss to synthesize more plausible hairstyles, and the visibility of each strand is also used as a weight term to improve the reconstruction accuracy. The encoder-decoder architecture of our network naturally provides a compact and continuous representation for hairstyles, which allows us to interpolate naturally between hairstyles. We use a large set of rendered synthetic hair models to train our network. Our method scales to real images because an intermediate 2D orientation field, automatically calculated from the real image, factors out the difference between synthetic and real hairs. We demonstrate the effectiveness and robustness of our method on a wide range of challenging real Internet pictures, and show reconstructed hair sequences from videos.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Huang, Zeng; Li, Tianye; Chen, Weikai; Zhao, Yajie; Xing, Jun; LeGendre, Chloe; Luo, Linjie; Ma, Chongyang; Li, Hao
Deep Volumetric Video From Very Sparse Multi-View Performance Capture Proceedings Article
In: Proceedings of the 15th European Conference on Computer Vision, Computer Vision Foundation, Munich, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{huang_deep_2018,
title = {Deep Volumetric Video From Very Sparse Multi-View Performance Capture},
author = {Zeng Huang and Tianye Li and Weikai Chen and Yajie Zhao and Jun Xing and Chloe LeGendre and Linjie Luo and Chongyang Ma and Hao Li},
url = {http://openaccess.thecvf.com/content_ECCV_2018/papers/Zeng_Huang_Deep_Volumetric_Video_ECCV_2018_paper.pdf},
year = {2018},
date = {2018-09-01},
booktitle = {Proceedings of the 15th European Conference on Computer Vision},
publisher = {Computer Vision Foundation},
address = {Munich, Germany},
abstract = {We present a deep learning based volumetric approach for performance capture using a passive and highly sparse multi-view capture system. State-of-the-art performance capture systems require either prescanned actors, large number of cameras or active sensors. In this work, we focus on the task of template-free, per-frame 3D surface reconstruction from as few as three RGB sensors, for which conventional visual hull or multi-view stereo methods fail to generate plausible results. We introduce a novel multi-view Convolutional Neural Network (CNN) that maps 2D images to a 3D volumetric field and we use this field to encode the probabilistic distribution of surface points of the captured subject. By querying the resulting field, we can instantiate the clothed human body at arbitrary resolutions. Our approach scales to different numbers of input images, which yield increased reconstruction quality when more views are used. Although only trained on synthetic data, our network can generalize to handle real footage from body performance capture. Our method is suitable for high-quality low-cost full body volumetric capture solutions, which are gaining popularity for VR and AR content creation. Experimental results demonstrate that our method is significantly more robust and accurate than existing techniques when only very sparse views are available.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Yamaguchi, Shuco; Saito, Shunsuke; Nagano, Koki; Zhao, Yajie; Chen, Weikai; Olszewski, Kyle; Morishima, Shigeo; Li, Hao
High-fidelity facial reflectance and geometry inference from an unconstrained image Journal Article
In: ACM Transactions on Graphics, vol. 37, no. 4, pp. 1–14, 2018, ISSN: 07300301.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{yamaguchi_high-fidelity_2018,
title = {High-fidelity facial reflectance and geometry inference from an unconstrained image},
author = {Shuco Yamaguchi and Shunsuke Saito and Koki Nagano and Yajie Zhao and Weikai Chen and Kyle Olszewski and Shigeo Morishima and Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=3197517.3201364},
doi = {10.1145/3197517.3201364},
issn = {07300301},
year = {2018},
date = {2018-08-01},
journal = {ACM Transactions on Graphics},
volume = {37},
number = {4},
pages = {1–14},
abstract = {We present a deep learning-based technique to infer high-quality facial reflectance and geometry given a single unconstrained image of the subject, which may contain partial occlusions and arbitrary illumination conditions. The reconstructed high-resolution textures, which are generated in only a few seconds, include high-resolution skin surface reflectance maps, representing both the diffuse and specular albedo, and medium- and high-frequency displacement maps, thereby allowing us to render compelling digital avatars under novel lighting conditions. To extract this data, we train our deep neural networks with a high-quality skin reflectance and geometry database created with a state-of-the-art multi-view photometric stereo system using polarized gradient illumination. Given the raw facial texture map extracted from the input image, our neural networks synthesize complete reflectance and displacement maps, as well as complete missing regions caused by occlusions. The completed textures exhibit consistent quality throughout the face due to our network architecture, which propagates texture features from the visible region, resulting in high-fidelity details that are consistent with those seen in visible regions. We describe how this highly underconstrained problem is made tractable by dividing the full inference into smaller tasks, which are addressed by dedicated neural networks. We demonstrate the effectiveness of our network design with robust texture completion from images of faces that are largely occluded. With the inferred reflectance and geometry data, we demonstrate the rendering of high-fidelity 3D avatars from a variety of subjects captured under different lighting conditions. In addition, we perform evaluations demonstrating that our method can infer plausible facial reflectance and geometric details comparable to those obtained from high-end capture devices, and outperform alternative approaches that require only a single unconstrained input image.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
LeGendre, Chloe; Bladin, Kalle; Kishore, Bipin; Ren, Xinglei; Yu, Xueming; Debevec, Paul
Efficient Multispectral Facial Capture with Monochrome Cameras Proceedings Article
In: ACM SIGGRAPH 2018 Posters on - SIGGRAPH '18, ACM Press, Vancouver, British Columbia, Canada, 2018, ISBN: 978-1-4503-5817-0.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{legendre_efficient_2018,
title = {Efficient Multispectral Facial Capture with Monochrome Cameras},
author = {Chloe LeGendre and Kalle Bladin and Bipin Kishore and Xinglei Ren and Xueming Yu and Paul Debevec},
url = {http://dl.acm.org/citation.cfm?doid=3230744.3230778},
doi = {10.1145/3230744.3230778},
isbn = {978-1-4503-5817-0},
year = {2018},
date = {2018-08-01},
booktitle = {ACM SIGGRAPH 2018 Posters on - SIGGRAPH '18},
publisher = {ACM Press},
address = {Vancouver, British Columbia, Canada},
abstract = {We propose a variant to polarized gradient illumination facial scanning which uses monochrome instead of color cameras to achieve more efficient and higher-resolution results. In typical polarized gradient facial scanning, sub-millimeter geometric detail is acquired by photographing the subject in eight or more polarized spherical gradient lighting conditions made with white LEDs, and RGB cameras are used to acquire color texture maps of the subject's appearance. In our approach, we replace the color cameras and white LEDs with monochrome cameras and multispectral, colored LEDs, leveraging that color images can be formed from successive monochrome images recorded under different illumination colors. While a naive extension of the scanning process to this setup would require multiplying the number of images by number of color channels, we show that the surface detail maps can be estimated directly from monochrome imagery, so that only an additional n photographs are required, where n is the number of added spectral channels. We also introduce a new multispectral optical flow approach to align images across spectral channels in the presence of slight subject motion. Lastly, for the case where a capture system's white light sources are polarized and its multispectral colored LEDs are not, we introduce the technique of multispectral polarization promotion, where we estimate the cross- and parallel-polarized monochrome images for each spectral channel from their corresponding images under a full sphere of even, unpolarized illumination. We demonstrate that this technique allows us to efficiently acquire a full color (or even multispectral) facial scan using monochrome cameras, unpolarized multispectral colored LEDs, and polarized white LEDs.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhang, Bo; He, Mingming; Liao, Jing; Sander, Pedro V; Yuan, Lu; Bermak, Amine; Chen, Dong
Deep Exemplar-Based Video Colorization Journal Article
In: ACM Transactions on Graphics, vol. 37, no. 4, pp. 10, 2018.
Abstract | Links | BibTeX | Tags: Graphics
@article{zhang_deep_2018,
title = {Deep Exemplar-Based Video Colorization},
author = {Bo Zhang and Mingming He and Jing Liao and Pedro V Sander and Lu Yuan and Amine Bermak and Dong Chen},
url = {https://dl.acm.org/citation.cfm?id=3201365},
doi = {10.1145/3197517.3201365},
year = {2018},
date = {2018-08-01},
journal = {ACM Transactions on Graphics},
volume = {37},
number = {4},
pages = {10},
abstract = {This paper presents the first end-to-end network for exemplar-based video colorization. The main challenge is to achieve temporal consistency while remaining faithful to the reference style. To address this issue, we introduce a recurrent framework that unifies the semantic correspondence and color propagation steps. Both steps allow a provided reference image to guide the colorization of every frame, thus reducing accumulated propagation errors. Video frames are colorized in sequence based on the colorization history, and its coherency is further enforced by the temporal consistency loss. All of these components, learnt end-to-end, help produce realistic videos with good temporal stability. Experiments show our result is superior to the state-of-the-art methods both quantitatively and qualitatively.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
Huynh, Loc; Chen, Weikai; Saito, Shunsuke; Xing, Jun; Nagano, Koki; Jones, Andrew; Debevec, Paul; Li, Hao
Mesoscopic Facial Geometry Inference Using Deep Neural Networks Proceedings Article
In: Proceedings of the 31st IEEE International Conference on Computer Vision and Pattern Recognition, IEEE, Salt Lake City, UT, 2018.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{huynh_mesoscopic_2018,
title = {Mesoscopic Facial Geometry Inference Using Deep Neural Networks},
author = {Loc Huynh and Weikai Chen and Shunsuke Saito and Jun Xing and Koki Nagano and Andrew Jones and Paul Debevec and Hao Li},
url = {http://openaccess.thecvf.com/content_cvpr_2018/papers/Huynh_Mesoscopic_Facial_Geometry_CVPR_2018_paper.pdf},
year = {2018},
date = {2018-06-01},
booktitle = {Proceedings of the 31st IEEE International Conference on Computer Vision and Pattern Recognition},
publisher = {IEEE},
address = {Salt Lake City, UT},
abstract = {We present a learning-based approach for synthesizing facial geometry at medium and fine scales from diffusely-lit facial texture maps. When applied to an image sequence, the synthesized detail is temporally coherent. Unlike current state-of-the-art methods [17, 5], which assume ”dark is deep”, our model is trained with measured facial detail collected using polarized gradient illumination in a Light Stage [20]. This enables us to produce plausible facial detail across the entire face, including where previous approaches may incorrectly interpret dark features as concavities such as at moles, hair stubble, and occluded pores. Instead of directly inferring 3D geometry, we propose to encode fine details in high-resolution displacement maps which are learned through a hybrid network adopting the state-of-the-art image-to-image translation network [29] and super resolution network [43]. To effectively capture geometric detail at both mid- and high frequencies, we factorize the learning into two separate sub-networks, enabling the full range of facial detail to be modeled. Results from our learning-based approach compare favorably with a high-quality active facial scanhening technique, and require only a single passive lighting condition without a complex scanning setup.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Yu, Tao; Zheng, Zerong; Guo, Kaiwen; Zhao, Jianhui; Dai, Qionghai; Li, Hao; Pons-Moll, Gerard; Liu, Yebin
DoubleFusion: Real-time Capture of Human Performances with Inner Body Shapes from a Single Depth Sensor Proceedings Article
In: Proceedings of the 31st IEEE International Conference on Computer Vision and Pattern Recognition, IEEE, Salt Lake City, UT, 2018.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{yu_doublefusion_2018,
title = {DoubleFusion: Real-time Capture of Human Performances with Inner Body Shapes from a Single Depth Sensor},
author = {Tao Yu and Zerong Zheng and Kaiwen Guo and Jianhui Zhao and Qionghai Dai and Hao Li and Gerard Pons-Moll and Yebin Liu},
url = {http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/1321.pdf},
year = {2018},
date = {2018-06-01},
booktitle = {Proceedings of the 31st IEEE International Conference on Computer Vision and Pattern Recognition},
publisher = {IEEE},
address = {Salt Lake City, UT},
abstract = {We propose DoubleFusion, a new real-time system that combines volumetric dynamic reconstruction with datadriven template fitting to simultaneously reconstruct detailed geometry, non-rigid motion and the inner human body shape from a single depth camera. One of the key contributions of this method is a double layer representation consisting of a complete parametric body shape inside, and a gradually fused outer surface layer. A pre-defined node graph on the body surface parameterizes the nonrigid deformations near the body, and a free-form dynamically changing graph parameterizes the outer surface layer far from the body, which allows more general reconstruction. We further propose a joint motion tracking method based on the double layer representation to enable robust and fast motion tracking performance. Moreover, the inner body shape is optimized online and forced to fit inside the outer surface layer. Overall, our method enables increasingly denoised, detailed and complete surface reconstructions, fast motion tracking performance and plausible inner body shape reconstruction in real-time. In particular, experiments show improved fast motion tracking and loop closure performance on more challenging scenarios.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Yi; He, Chong; Li, Zimo; Xiao, Shuangjiu; Huang, Zeng; Li, Hao
Auto-Conditioned Recurrent Networks for Extended Complex Human Motion Synthesis Proceedings Article
In: Proceedings of the 6th International Conference on Learning Representations, ICLR, Vancouver, British Columbia, Canada, 2018.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{zhou_auto-conditioned_2018,
title = {Auto-Conditioned Recurrent Networks for Extended Complex Human Motion Synthesis},
author = {Yi Zhou and Chong He and Zimo Li and Shuangjiu Xiao and Zeng Huang and Hao Li},
url = {https://openreview.net/forum?id=r11Q2SlRW},
year = {2018},
date = {2018-04-01},
booktitle = {Proceedings of the 6th International Conference on Learning Representations},
publisher = {ICLR},
address = {Vancouver, British Columbia, Canada},
abstract = {We present a real-time method for synthesizing highly complex human motions using a novel training regime we call the auto-conditioned Recurrent Neural Network (acRNN). Recently, researchers have attempted to synthesize new motion by using autoregressive techniques, but existing methods tend to freeze or diverge after a couple of seconds due to an accumulation of errors that are fed back into the network. Furthermore, such methods have only been shown to be reliable for relatively simple human motions, such as walking or running. In contrast, our approach can synthesize arbitrary motions with highly complex styles, including dances or martial arts in addition to locomotion. The acRNN is able to accomplish this by explicitly accommodating for autoregressive noise accumulation during training. Our work is the first to our knowledge that demonstrates the ability to generate over 18,000 continuous frames (300 seconds) of new complex human motion w.r.t. different styles.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Chen, Samantha; Rosenberg, Evan Suma
Redirected Walking Strategies in Irregularly Shaped and Dynamic Physical Environments Proceedings Article
In: Proceedings of the IEEE VR 2018, the 25th IEEE Conference on Virtual Reality and 3D User Interfaces, IEEE, Reutlingen, Germany, 2018.
Abstract | Links | BibTeX | Tags: Graphics, MxR
@inproceedings{chen_redirected_2018,
title = {Redirected Walking Strategies in Irregularly Shaped and Dynamic Physical Environments},
author = {Haiwei Chen and Samantha Chen and Evan Suma Rosenberg},
url = {http://wevr.adalsimeone.me/2018/WEVR2018_Chen.pdf},
year = {2018},
date = {2018-03-01},
booktitle = {Proceedings of the IEEE VR 2018, the 25th IEEE Conference on Virtual Reality and 3D User Interfaces},
publisher = {IEEE},
address = {Reutlingen, Germany},
abstract = {Redirected walking (RDW) is a Virtual Reality (VR) locomotion technique that enables the exploration of a large virtual environment (VE) within a small physical space via real walking. Thus far, the physical environment has generally been assumed to be rectangular, static, and free of obstacles. However, it is unlikely that real-world locations that may be used for VR fulfill these constraints. In addition, accounting for a dynamically changing physical environment allows RDWalgorithms to accommodate gradually mapped physical environments and moving objects. In this work, we introduce novel approaches that adapt RDWalgorithms to support irregularly shaped and dynamic physical environments. Our methods are divided into three categories: novel RDW Greedy Algorithms that provide a generalized approach for any VE, adapted RDW Planning Algorithms that provide an optimized solution when virtual path prediction is available, and last but not least, techniques for representing irregularly shaped and dynamic physical environments that can improve performance of RDW algorithms.},
keywords = {Graphics, MxR},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
LeGendre, Chloe; Batsos, Konstantinos; Mordohai, Philippos
High-Resolution Stereo Matching based on Sampled Photoconsistency Computation Proceedings Article
In: Proceedings of the British Machine Vision Conference 2017., London, UK, 2017.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{legendre_high-resolution_2017,
title = {High-Resolution Stereo Matching based on Sampled Photoconsistency Computation},
author = {Chloe LeGendre and Konstantinos Batsos and Philippos Mordohai},
url = {http://ict.usc.edu/pubs/High-Resolution%20Stereo%20Matching%20based%20on%20Sampled%20Photoconsistency%20Computation.pdf},
year = {2017},
date = {2017-09-01},
booktitle = {Proceedings of the British Machine Vision Conference 2017.},
address = {London, UK},
abstract = {We propose an approach to binocular stereo that avoids exhaustive photoconsistency computations at every pixel, since they are redundant and computationally expensive, especially for high resolution images. We argue that developing scalable stereo algorithms is critical as image resolution is expected to continue increasing rapidly. Our approach relies on oversegmentation of the images into superpixels, followed by photoconsistency computation for only a random subset of the pixels of each superpixel. This generates sparse reconstructed points which are used to fit planes. Plane hypotheses are propagated among neighboring superpixels, and they are evaluated at each superpixel by selecting a random subset of pixels on which to aggregate photoconsistency scores for the competing planes. We performed extensive tests to characterize the performance of this algorithm in terms of accuracy and speed on the full-resolution stereo pairs of the 2014 Middlebury benchmark that contains up to 6-megapixel images. Our results show that very large computational savings can be achieved at a small loss of accuracy. A multi-threaded implementation of our method 1 is faster than other methods that achieve similar accuracy and thus it provides a useful accuracy-speed tradeoff.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
LeGendre, Chloe; Hyunh, Loc; Wang, Shanhe; Debevec, Paul
Modeling vellus facial hair from asperity scattering silhouettes Proceedings Article
In: Proceedings of SIGGRAPH 2017, pp. 1–2, ACM Press, Los Angeles, CA, 2017, ISBN: 978-1-4503-5008-2.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{legendre_modeling_2017,
title = {Modeling vellus facial hair from asperity scattering silhouettes},
author = {Chloe LeGendre and Loc Hyunh and Shanhe Wang and Paul Debevec},
url = {http://dl.acm.org/citation.cfm?doid=3084363.3085057},
doi = {10.1145/3084363.3085057},
isbn = {978-1-4503-5008-2},
year = {2017},
date = {2017-08-01},
booktitle = {Proceedings of SIGGRAPH 2017},
pages = {1–2},
publisher = {ACM Press},
address = {Los Angeles, CA},
abstract = {We present a technique for modeling the vellus hair over the face based on observations of asperity scattering along a subject's silhouette. We photograph the backlit subject in profile and three-quarters views with a high-resolution DSLR camera to observe the vellus hair on the side and front of the face and separately acquire a 3D scan of the face geometry and texture. We render a library of backlit vellus hair patch samples with different geometric parameters such as density, orientation, and curvature, and we compute image statistics for each set of parameters. We trace the silhouette contour in each face image and straighten the backlit hair silhouettes using image resampling. We compute image statistics for each section of the facial silhouette and determine which set of hair modeling parameters best matches the statistics. We then generate a complete set of vellus hairs for the face by interpolating and extrapolating the matched parameters over the skin. We add the modeled vellus hairs to the 3D facial scan and generate renderings under novel lighting conditions, generally matching the appearance of real photographs.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
LeGendre, Chloe; Krissman, David; Debevec, Paul
Improved Chromakey of Hair Strands via Orientation Filter Convolution Proceedings Article
In: Proceeding of SIGGRAPH '17 ACM SIGGRAPH 2017, pp. 1–2, ACM Press, Los Angeles, CA, 2017, ISBN: 978-1-4503-5015-0.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{legendre_improved_2017,
title = {Improved Chromakey of Hair Strands via Orientation Filter Convolution},
author = {Chloe LeGendre and David Krissman and Paul Debevec},
url = {http://dl.acm.org/citation.cfm?id=3102200},
doi = {10.1145/3102163.3102200},
isbn = {978-1-4503-5015-0},
year = {2017},
date = {2017-07-01},
booktitle = {Proceeding of SIGGRAPH '17 ACM SIGGRAPH 2017},
pages = {1–2},
publisher = {ACM Press},
address = {Los Angeles, CA},
abstract = {We present a technique for improving the alpha maing of challenging green-screen video sequences involving hair strands. As hair strands are thin and can be semi-translucent, they are especially hard to separate from a background. However, they appear as extended lines and thus have a strong response when convolved with oriented filters, even in the presence of noise. We leverage this oriented filter response to robustly locate hair strands within each frame of an actor’s performance filmed in front of a green-screen. We demonstrate using production video footage that individual hair fibers excluded from a coarse artist’s matte can be located and then added to the foreground element, qualitatively improving the composite result without added manual labor.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Laine, Samuli; Karras, Tero; Aila, Timo; Herva, Antti; Saito, Shunsuke; Yu, Ronald; Li, Hao; Lehtinen, Jaakko
Production-level facial performance capture using deep convolutional neural networks Proceedings Article
In: Proceedings of the ACM SIGGRAPH / Eurographics Symposium on Computer Animation, pp. 1–10, ACM Press, Los Angeles, CA, 2017, ISBN: 978-1-4503-5091-4.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{laine_production-level_2017,
title = {Production-level facial performance capture using deep convolutional neural networks},
author = {Samuli Laine and Tero Karras and Timo Aila and Antti Herva and Shunsuke Saito and Ronald Yu and Hao Li and Jaakko Lehtinen},
url = {http://dl.acm.org/citation.cfm?doid=3099564.3099581},
doi = {10.1145/3099564.3099581},
isbn = {978-1-4503-5091-4},
year = {2017},
date = {2017-07-01},
booktitle = {Proceedings of the ACM SIGGRAPH / Eurographics Symposium on Computer Animation},
pages = {1–10},
publisher = {ACM Press},
address = {Los Angeles, CA},
abstract = {We present a real-time deep learning framework for video-based facial performance capture—the dense 3D tracking of an actor's face given a monocular video. Our pipeline begins with accurately capturing a subject using a high-end production facial capture pipeline based on multi-view stereo tracking and artist-enhanced animations. With 5–10 minutes of captured footage, we train a convolutional neural network to produce high-quality output, including self-occluded regions, from a monocular video sequence of that subject. Since this 3D facial performance capture is fully automated, our system can drastically reduce the amount of labor involved in the development of modern narrative-driven video games or films involving realistic digital doubles of actors and potentially hours of animated dialogue per character. We compare our results with several state-of-the-art monocular real-time facial capture techniques and demonstrate compelling animation inference in challenging areas such as eyes and lips.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Saito, Shunsuke; Wei, Lingyu; Hu, Liwen; Nagano, Koki; Li, Hao
Photorealistic Facial Texture Inference Using Deep Neural Networks Proceedings Article
In: Proceedings of the 30th IEEE International Conference on Computer Vision and Pattern Recognition 2017 (CVPR 2017), IEEE, Honolulu, HI, 2017.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{saito_photorealistic_2017,
title = {Photorealistic Facial Texture Inference Using Deep Neural Networks},
author = {Shunsuke Saito and Lingyu Wei and Liwen Hu and Koki Nagano and Hao Li},
url = {https://arxiv.org/abs/1612.00523},
year = {2017},
date = {2017-07-01},
booktitle = {Proceedings of the 30th IEEE International Conference on Computer Vision and Pattern Recognition 2017 (CVPR 2017)},
publisher = {IEEE},
address = {Honolulu, HI},
abstract = {We present a data-driven inference method that can synthesize a photorealistic texture map of a complete 3D face model given a partial 2D view of a person in the wild. After an initial estimation of shape and low-frequency albedo, we compute a high-frequency partial texture map, without the shading component, of the visible face area. To extract the fine appearance details from this incomplete input, we introduce a multi-scale detail analysis technique based on midlayer feature correlations extracted from a deep convolutional neural network. We demonstrate that fitting a convex combination of feature correlations from a high-resolution face database can yield a semantically plausible facial detail description of the entire face. A complete and photorealistic texture map can then be synthesized by iteratively optimizing for the reconstructed feature correlations. Using these high-resolution textures and a commercial rendering framework, we can produce high-fidelity 3D renderings that are visually comparable to those obtained with state-of-theart multi-view face capture systems. We demonstrate successful face reconstructions from a wide range of low resolution input images, including those of historical figures. In addition to extensive evaluations, we validate the realism of our results using a crowdsourced user study.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Fyffe, G.; Nagano, K.; Huynh, L.; Saito, S.; Busch, J.; Jones, A.; Li, H.; Debevec, P.
Multi-View Stereo on Consistent Face Topology Journal Article
In: Computer Graphics Forum, vol. 36, no. 2, pp. 295–309, 2017, ISSN: 01677055.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{fyffe_multi-view_2017,
title = {Multi-View Stereo on Consistent Face Topology},
author = {G. Fyffe and K. Nagano and L. Huynh and S. Saito and J. Busch and A. Jones and H. Li and P. Debevec},
url = {http://onlinelibrary.wiley.com/doi/10.1111/cgf.13127/epdf},
doi = {10.1111/cgf.13127},
issn = {01677055},
year = {2017},
date = {2017-05-01},
journal = {Computer Graphics Forum},
volume = {36},
number = {2},
pages = {295–309},
abstract = {We present a multi-view stereo reconstruction technique that directly produces a complete high-fidelity head model with consistent facial mesh topology. While existing techniques decouple shape estimation and facial tracking, our framework jointly optimizes for stereo constraints and consistent mesh parameterization. Our method is therefore free from drift and fully parallelizable for dynamic facial performance capture. We produce highly detailed facial geometries with artist-quality UV parameterization, including secondary elements such as eyeballs, mouth pockets, nostrils, and the back of the head. Our approach consists of deforming a common template model to match multi-view input images of the subject, while satisfying cross-view, cross-subject, and cross-pose consistencies using a combination of 2D landmark detection, optical flow, and surface and volumetric Laplacian regularization. Since the flow is never computed between frames, our method is trivially parallelized by processing each frame independently. Accurate rigid head pose is extracted using a PCA-based dimension reduction and denoising scheme. We demonstrate high-fidelity performance capture results with challenging head motion and complex facial expressions around eye and mouth regions. While the quality of our results is on par with the current state-of-the-art, our approach can be fully parallelized, does not suffer from drift, and produces face models with production-quality mesh topologies.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Berkiten, Sema; Halber, Maciej; Solomon, Justin; Ma, Chongyang; Li, Hao; Rusinkiewicz, Szymon
Learning Detail Transfer based on Geometric Features Journal Article
In: Computer Graphics Forum, vol. 36, no. 2, pp. 361–373, 2017, ISSN: 01677055.
Abstract | Links | BibTeX | Tags: ARL, DoD, Graphics, UARC
@article{berkiten_learning_2017,
title = {Learning Detail Transfer based on Geometric Features},
author = {Sema Berkiten and Maciej Halber and Justin Solomon and Chongyang Ma and Hao Li and Szymon Rusinkiewicz},
url = {http://onlinelibrary.wiley.com/doi/10.1111/cgf.13132/full},
doi = {10.1111/cgf.13132},
issn = {01677055},
year = {2017},
date = {2017-05-01},
journal = {Computer Graphics Forum},
volume = {36},
number = {2},
pages = {361–373},
abstract = {The visual richness of computer graphics applications is frequently limited by the difficulty of obtaining high-quality, detailed 3D models. This paper proposes a method for realistically transferring details (specifically, displacement maps) from existing high-quality 3D models to simple shapes that may be created with easy-to-learn modeling tools. Our key insight is to use metric learning to find a combination of geometric features that successfully predicts detail-map similarities on the source mesh; we use the learned feature combination to drive the detail transfer. The latter uses a variant of multi-resolution non-parametric texture synthesis, augmented by a high-frequency detail transfer step in texture space. We demonstrate that our technique can successfully transfer details among a variety of shapes including furniture and clothing.},
keywords = {ARL, DoD, Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Hu, Liwen; Bradley, Derek; Li, Hao; Beeler, Thabo
Simulation-Ready Hair Capture Journal Article
In: Computer Graphics Forum, vol. 36, no. 2, pp. 281–294, 2017, ISSN: 01677055.
Abstract | Links | BibTeX | Tags: Graphics
@article{hu_simulation-ready_2017,
title = {Simulation-Ready Hair Capture},
author = {Liwen Hu and Derek Bradley and Hao Li and Thabo Beeler},
url = {http://onlinelibrary.wiley.com/doi/10.1111/cgf.13126/full},
doi = {10.1111/cgf.13126},
issn = {01677055},
year = {2017},
date = {2017-05-01},
journal = {Computer Graphics Forum},
volume = {36},
number = {2},
pages = {281–294},
abstract = {Physical simulation has long been the approach of choice for generating realistic hair animations in CG. A constant drawback of simulation, however, is the necessity to manually set the physical parameters of the simulation model in order to get the desired dynamic behavior. To alleviate this, researchers have begun to explore methods for reconstructing hair from the real world and even to estimate the corresponding simulation parameters through the process of inversion. So far, however, these methods have had limited applicability, because dynamic hair capture can only be played back without the ability to edit, and solving for simulation parameters can only be accomplished for static hairstyles, ignoring the dynamic behavior. We present the first method for capturing dynamic hair and automatically determining the physical properties for simulating the observed hairstyle in motion. Since our dynamic inversion is agnostic to the simulation model, the proposed method applies to virtually any hair simulation technique, which we demonstrate using two state-of-the-art hair simulation models. The output of our method is a fully simulation-ready hairstyle, consisting of both the static hair geometry as well as its physical properties. The hairstyle can be easily edited by adding additional external forces, changing the head motion, or re-simulating in completely different environments, all while remaining faithful to the captured hairstyle.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
Yang, Chao; Lu, Xin; Lin, Zhe; Shechtman, Eli; Wang, Oliver; Li, Hao
High-Resolution Image Inpainting using Multi-Scale Neural Patch Synthesis Journal Article
In: arXiv preprint arXiv:1611.09969v2, 2017.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{yang_high-resolution_2017,
title = {High-Resolution Image Inpainting using Multi-Scale Neural Patch Synthesis},
author = {Chao Yang and Xin Lu and Zhe Lin and Eli Shechtman and Oliver Wang and Hao Li},
url = {https://arxiv.org/pdf/1611.09969},
year = {2017},
date = {2017-04-01},
journal = {arXiv preprint arXiv:1611.09969v2},
abstract = {Recent advances in deep learning have shown exciting promise in filling large holes in natural images with semantically plausible and context aware details, impacting fundamental image manipulation tasks such as object removal. While these learning-based methods are significantly more effective in capturing high-level features than prior techniques, hey can only handle very low-resolution inputs due to memory limitations and difficulty in training. Even for slightly larger images, the inpainted regions would appear blurry and unpleasant boundaries become visible. We propose a multi-scale neural patch synthesis approach based on joint optimization of image content and texture constraints, which not only preserves contextual structures but also produces high-frequency details by matching and adapting patches with the most similar mid-layer feature correlations of a deep classification network. We evaluate our method on the ImageNet and Paris Streetview datasets and achieved state-of-theart inpainting accuracy. We show our approach produces sharper and more coherent results than prior methods, especially for high-resolution images.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
2016
Saito, Shunsuke; Wei, Lingyu; Hu, Liwen; Nagano, Koki; Li, Hao
Photorealistic Facial Texture Inference Using Deep Neural Networks Journal Article
In: arXiv preprint arXiv:1612.00523, 2016.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{saito_photorealistic_2016,
title = {Photorealistic Facial Texture Inference Using Deep Neural Networks},
author = {Shunsuke Saito and Lingyu Wei and Liwen Hu and Koki Nagano and Hao Li},
url = {https://arxiv.org/abs/1612.00523},
year = {2016},
date = {2016-12-01},
journal = {arXiv preprint arXiv:1612.00523},
abstract = {We present a data-driven inference method that can synthesize a photorealistic texture map of a complete 3D face model given a partial 2D view of a person in the wild. After an initial estimation of shape and low-frequency albedo, we compute a high-frequency partial texture map, without the shading component, of the visible face area. To extract the fine appearance details from this incomplete input, we introduce a multi-scale detail analysis technique based on midlayer feature correlations extracted from a deep convolutional neural network. We demonstrate that fitting a convex combination of feature correlations from a high-resolution face database can yield a semantically plausible facial detail description of the entire face. A complete and photorealistic texture map can then be synthesized by iteratively optimizing for the reconstructed feature correlations. Using these high-resolution textures and a commercial rendering framework, we can produce high-fidelity 3D renderings that are visually comparable to those obtained with state-of-theart multi-view face capture systems. We demonstrate successful face reconstructions from a wide range of low resolution input images, including those of historical figures. In addition to extensive evaluations, we validate the realism of our results using a crowdsourced user study.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Olszewski, Kyle; Lim, Joseph J.; Saito, Shunsuke; Li, Hao
High-fidelity facial and speech animation for VR HMDs Journal Article
In: ACM Transactions on Graphics, vol. 35, no. 6, pp. 1–14, 2016, ISSN: 07300301.
Abstract | Links | BibTeX | Tags: Graphics
@article{olszewski_high-fidelity_2016,
title = {High-fidelity facial and speech animation for VR HMDs},
author = {Kyle Olszewski and Joseph J. Lim and Shunsuke Saito and Hao Li},
url = {http://dl.acm.org/citation.cfm?doid=2980179.2980252},
doi = {10.1145/2980179.2980252},
issn = {07300301},
year = {2016},
date = {2016-11-01},
journal = {ACM Transactions on Graphics},
volume = {35},
number = {6},
pages = {1–14},
abstract = {Several significant challenges currently prohibit expressive interaction in virtual reality (VR). The occlusion introduced by modern head-mounted displays (HMDs) makes most existing techniques for facial tracking intractable in this scenario. Furthermore, even state-of-the-art techniques used for real-time facial tracking in less constrained environments fail to capture subtle details of the user’s facial expressions that are essential for compelling speech animation. We introduce a novel system for HMD users to control a digital avatar in real-time while producing plausible speech animation and emotional expressions. Using a monocular camera attached to the front of an HMD, we record video sequences from multiple subjects performing a variety of facial expressions and speaking several phonetically-balanced sentences. These images are used with artist-generated animation data corresponding to these sequences to train a convolutional neural network (CNN) to regress images of a user’s mouth region to the parameters that control a digital avatar. To make training this system more tractable, we make use of audiobased alignment techniques to map images of multiple users making the same utterance to the corresponding animation parameters. We demonstrate that our regression technique is also feasible for tracking the expressions around the user’s eye region, including the eyebrows, with an infrared (IR) camera within the HMD, thereby enabling full facial tracking. This system requires no user-specific calibration, makes use of easily obtainable consumer hardware, and produces high-quality animations of both speech and emotional expressions. Finally, we demonstrate the quality of our system on a variety of subjects and evaluate its performance against state-of-the-art realtime facial tracking techniques.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {article}
}
LeGendre, Chloe; Yu, Xueming; Debevec, Paul
Efficient Multispectral Reflectance Function Capture for Image-Based Relighting Proceedings Article
In: Proceedings of the Color and Imaging Conference, pp. 47–58, Society for Imaging Science and Technology, San Diego, CA, 2016.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{legendre_efficient_2016,
title = {Efficient Multispectral Reflectance Function Capture for Image-Based Relighting},
author = {Chloe LeGendre and Xueming Yu and Paul Debevec},
url = {http://www.ingentaconnect.com/contentone/ist/cic/2016/00002016/00000001/art00008},
year = {2016},
date = {2016-11-01},
booktitle = {Proceedings of the Color and Imaging Conference},
pages = {47–58},
publisher = {Society for Imaging Science and Technology},
address = {San Diego, CA},
abstract = {Image-based relighting (IBRL) renders the appearance of a subject in a novel lighting environment as a linear combination of the images of its reflectance field , the appearance of the subject lit by each incident lighting direction. Traditionally, a tristimulus color camera records the reflectance field as the subject is sequentially illuminated by broad-spectrum white light sources from each direction. Using a multispectral LED sphere and either a tristimulus (RGB) or monochrome camera, we photograph a still life scene to acquire its multispectral reflectance field – its appearance for every lighting direction for multiple incident illumination spectra. For the tristimulus camera, we demonstrate improved color rendition for IBRL when using the multispectral reflectance field, producing a closer match to the scene's actual appearance in a real-world illumination environment. For the monochrome camera, we also show close visual matches. We additionally propose an efficient method for acquiring such multispectral reflectance fields, augmenting the traditional broad-spectrum lighting basis capture with only a few additional images equal to the desired number of spectral channels. In these additional images, we illuminate the subject by a complete sphere of each available narrow-band LED light source, in our case: red, amber, green, cyan, and blue. From the full-sphere illumination images, we promote the white-light reflectance functions for every direction to multispectral, effectively hallucinating the appearance of the subject under each LED spectrum for each lighting direction. We also use polarization imaging to separate the diffuse and specular components of the reflectance functions, spectrally promoting these components according to different models. We validate that the approximated multispectral reflectance functions closely match those generated by a fully multispectral omnidirectional lighting basis, suggesting a rapid multispectral reflectance field capture method which could be applied for live subjects.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Saito, Shunsuke; Li, Tianye; Li, Hao
Real-Time Facial Segmentation and Performance Capture from RGB Input Proceedings Article
In: Proceedings of the 14th European Conference on Computer Vision and Pattern Recognition, (ECCV 2016), pp. 244–261, Springer International Publishing, Amsterdam, The Netherlands, 2016, ISBN: 978-3-319-46483-1 978-3-319-46484-8.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{saito_real-time_2016,
title = {Real-Time Facial Segmentation and Performance Capture from RGB Input},
author = {Shunsuke Saito and Tianye Li and Hao Li},
url = {https://link.springer.com/chapter/10.1007/978-3-319-46484-8_15},
isbn = {978-3-319-46483-1 978-3-319-46484-8},
year = {2016},
date = {2016-10-01},
booktitle = {Proceedings of the 14th European Conference on Computer Vision and Pattern Recognition, (ECCV 2016)},
pages = {244–261},
publisher = {Springer International Publishing},
address = {Amsterdam, The Netherlands},
abstract = {We introduce the concept of unconstrained real-time 3D facial performance capture through explicit semantic segmentation in the RGB input. To ensure robustness, cutting edge supervised learning approaches rely on large training datasets of face images captured in the wild. While impressive tracking quality has been demonstrated for faces that are largely visible, any occlusion due to hair, accessories, or hand-to-face gestures would result in significant visual artifacts and loss of tracking accuracy. The modeling of occlusions has been mostly avoided due to its immense space of appearance variability. To address this curse of high dimensionality, we perform tracking in unconstrained images assuming non-face regions can be fully masked out. Along with recent breakthroughs in deep learning, we demonstrate that pixel-level facial segmentation is possible in real-time by repurposing convolutional neural networks designed originally for general semantic segmentation. We develop an efficient architecture based on a two-stream deconvolution network with complementary characteristics, and introduce carefully designed training samples and data augmentation strategies for improved segmentation accuracy and robustness. We adopt a state-of-the-art regression-based facial tracking framework with segmented face images as training, and demonstrate accurate and uninterrupted facial performance capture in the presence of extreme occlusion and even side views. Furthermore, the resulting segmentation can be directly used to composite partial 3D face models on the input images and enable seamless facial manipulation tasks, such as virtual make-up or face replacement.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
Wang, Rhuizhe; Wei, Lingyu; Vouga, Etienne; Huang, Qixing; Ceylan, Duygu; Medioni, Gerard; Li, Hao
Capturing Dynamic Textured Surfaces of Moving Targets Proceedings Article
In: Proceedings of the 14th European Conference on Computer Vision and Pattern Recognition, (ECCV 2016 Spotlight Presentation), Springer International Publishing, Amsterdam, The Netherlands, 2016, ISBN: 978-3-319-46477-0 978-3-319-46478-7.
Abstract | Links | BibTeX | Tags: Graphics
@inproceedings{wang_capturing_2016,
title = {Capturing Dynamic Textured Surfaces of Moving Targets},
author = {Rhuizhe Wang and Lingyu Wei and Etienne Vouga and Qixing Huang and Duygu Ceylan and Gerard Medioni and Hao Li},
url = {https://link.springer.com/chapter/10.1007/978-3-319-46478-7_17},
isbn = {978-3-319-46477-0 978-3-319-46478-7},
year = {2016},
date = {2016-10-01},
booktitle = {Proceedings of the 14th European Conference on Computer Vision and Pattern Recognition, (ECCV 2016 Spotlight Presentation)},
publisher = {Springer International Publishing},
address = {Amsterdam, The Netherlands},
abstract = {We present an end-to-end system for reconstructing complete watertight and textured models of moving subjects such as clothed humans and animals, using only three or four handheld sensors. The heart of our framework is a new pairwise registration algorithm that minimizes, using a particle swarm strategy, an alignment error metric based on mutual visibility and occlusion. We show that this algorithm reliably registers partial scans with as little as 15% overlap without requiring any initial correspondences, and outperforms alternative global registration algorithms. This registration algorithm allows us to reconstruct moving subjects from free-viewpoint video produced by consumer-grade sensors, without extensive sensor calibration, constrained capture volume, expensive arrays of cameras, or templates of the subject geometry.},
keywords = {Graphics},
pubstate = {published},
tppubtype = {inproceedings}
}
LeGendre, Chloe; Yu, Xueming; Debevec, Paul
Optimal LED selection for multispectral lighting reproduction Proceedings Article
In: Proceedings of the SIGGRAPH '16 ACM SIGGRAPH 2016, ACM, New York, NY, 2016, ISBN: 978-1-4503-4371-8.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@inproceedings{legendre_optimal_2016,
title = {Optimal LED selection for multispectral lighting reproduction},
author = {Chloe LeGendre and Xueming Yu and Paul Debevec},
url = {http://dl.acm.org/citation.cfm?id=2945150},
doi = {10.1145/2945078.2945150},
isbn = {978-1-4503-4371-8},
year = {2016},
date = {2016-07-01},
booktitle = {Proceedings of the SIGGRAPH '16 ACM SIGGRAPH 2016},
publisher = {ACM},
address = {New York, NY},
abstract = {We demonstrate the sufficiency of using as few as five LEDs of distinct spectra for multispectral lighting reproduction and solve for the optimal set of five from 11 such commercially available LEDs. We leverage published spectral reflectance, illuminant, and camera spectral sensitivity datasets to show that two approaches of lighting reproduction, matching illuminant spectra directly and matching material color appearance observed by one or more cameras or a human observer, yield the same LED selections. Our proposed optimal set of five LEDs includes red, green, and blue with narrow emission spectra, along with white and amber with broader spectra.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
LeGendre, Chloe; Yu, Xueming; Liu, Dai; Busch, Jay; Jones, Andrew; Pattanaik, Sumanta; Debevec, Paul
Practical Multispectral Lighting Reproduction Journal Article
In: ACM Transactions on Graphics, vol. 35, no. 4, pp. 1–11, 2016, ISSN: 07300301.
Abstract | Links | BibTeX | Tags: Graphics, UARC
@article{legendre_practical_2016,
title = {Practical Multispectral Lighting Reproduction},
author = {Chloe LeGendre and Xueming Yu and Dai Liu and Jay Busch and Andrew Jones and Sumanta Pattanaik and Paul Debevec},
url = {http://dl.acm.org/citation.cfm?id=2925934},
doi = {10.1145/2897824.2925934},
issn = {07300301},
year = {2016},
date = {2016-07-01},
journal = {ACM Transactions on Graphics},
volume = {35},
number = {4},
pages = {1–11},
abstract = {We present a practical framework for reproducing omnidirectional incident illumination conditions with complex spectra using a light stage with multispectral LED lights. For lighting acquisition, we augment standard RGB panoramic photography with one or more observations of a color chart with numerous reflectance spectra. We then solve for how to drive the multispectral light sources so that they best reproduce the appearance of the color charts in the original lighting. Even when solving for non-negative intensities, we show that accurate lighting reproduction is achievable using just four or six distinct LED spectra for a wide range of incident illumination spectra. A significant benefit of our approach is that it does not require the use of specialized equipment (other than the light stage) such as monochromators, spectroradiometers, or explicit knowledge of the LED power spectra, camera spectral response functions, or color chart reflectance spectra. We describe two simple devices for multispectral lighting capture, one for slow measurements of detailed angular spectral detail, and one for fast measurements with coarse angular detail. We validate the approach by realistically compositing real subjects into acquired lighting environments, showing accurate matches to how the subject would actually look within the environments, even for those including complex multispectral illumination. We also demonstrate dynamic lighting capture and playback using the technique.},
keywords = {Graphics, UARC},
pubstate = {published},
tppubtype = {article}
}
Jones, Andrew; Nagano, Koki; Busch, Jay; Yu, Xueming; Peng, Hsuan-Yueh; Barreto, Joseph; Alexander, Oleg; Bolas, Mark; Debevec, Paul; Unger, Jonas
Time-Offset Conversations on a Life-Sized Automultiscopic Projector Array Proceedings Article
In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 18–26, Las Vegas, NV, 2016.
Abstract | Links | BibTeX | Tags: Graphics, MxR, UARC
@inproceedings{jones_time-offset_2016,
title = {Time-Offset Conversations on a Life-Sized Automultiscopic Projector Array},
author = {Andrew Jones and Koki Nagano and Jay Busch and Xueming Yu and Hsuan-Yueh Peng and Joseph Barreto and Oleg Alexander and Mark Bolas and Paul Debevec and Jonas Unger},
url = {http://www.cv-foundation.org//openaccess/content_cvpr_2016_workshops/w16/papers/Jones_Time-Offset_Conversations_on_CVPR_2016_paper.pdf},
year = {2016},
date = {2016-07-01},
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
pages = {18–26},
address = {Las Vegas, NV},
abstract = {We present a system for creating and displaying interactive life-sized 3D digital humans based on pre-recorded interviews. We use 30 cameras and an extensive list of questions to record a large set of video responses. Users access videos through a natural conversation interface that mimics face-to-face interaction. Recordings of answers, listening and idle behaviors are linked together to create a persistent visual image of the person throughout the interaction. The interview subjects are rendered using flowed light fields and shown life-size on a special rear-projection screen with an array of 216 video projectors. The display allows multiple users to see different 3D perspectives of the subject in proper relation to their viewpoints, without the need for stereo glasses. The display is effective for interactive conversations since it provides 3D cues such as eye gaze and spatial hand gestures.},
keywords = {Graphics, MxR, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}