Publications
Search
Jalal-Kamali, Ali; Gurney, Nikolos; Pynadath, David
Predicting Team Performance from Communications in Simulated Search-and-Rescue Miscellaneous
2025, (arXiv:2503.03791 [cs]).
@misc{jalal-kamali_predicting_2025,
title = {Predicting Team Performance from Communications in Simulated Search-and-Rescue},
author = {Ali Jalal-Kamali and Nikolos Gurney and David Pynadath},
url = {http://arxiv.org/abs/2503.03791},
doi = {10.48550/arXiv.2503.03791},
year = {2025},
date = {2025-03-01},
urldate = {2025-03-18},
publisher = {arXiv},
abstract = {Understanding how individual traits influence team performance is valuable, but these traits are not always directly observable. Prior research has inferred traits like trust from behavioral data. We analyze conversational data to identify team traits and their correlation with teaming outcomes. Using transcripts from a Minecraft-based search-and-rescue experiment, we apply topic modeling and clustering to uncover key interaction patterns. Our findings show that variations in teaming outcomes can be explained through these inferences, with different levels of predictive power derived from individual traits and team dynamics.},
note = {arXiv:2503.03791 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Kwon, Deuksin; Hae, Jiwon; Clift, Emma; Shamsoddini, Daniel; Gratch, Jonathan; Lucas, Gale M.
ASTRA: A Negotiation Agent with Adaptive and Strategic Reasoning through Action in Dynamic Offer Optimization Miscellaneous
2025, (arXiv:2503.07129 [cs]).
@misc{kwon_astra_2025,
title = {ASTRA: A Negotiation Agent with Adaptive and Strategic Reasoning through Action in Dynamic Offer Optimization},
author = {Deuksin Kwon and Jiwon Hae and Emma Clift and Daniel Shamsoddini and Jonathan Gratch and Gale M. Lucas},
url = {http://arxiv.org/abs/2503.07129},
doi = {10.48550/arXiv.2503.07129},
year = {2025},
date = {2025-03-01},
urldate = {2025-03-18},
publisher = {arXiv},
abstract = {Negotiation requires dynamically balancing self-interest and cooperation to maximize one's own utility. Yet, existing agents struggle due to bounded rationality in human data, low adaptability to counterpart behavior, and limited strategic reasoning. To address this, we introduce principle-driven negotiation agents, powered by ASTRA, a novel framework for turn-level offer optimization grounded in two core principles: opponent modeling and Tit-for-Tat reciprocity. ASTRA operates in three stages: (1) interpreting counterpart behavior, (2) optimizing counteroffers via a linear programming (LP) solver, and (3) selecting offers based on negotiation tactics and the partner's acceptance probability. Through simulations and human evaluations, our agent effectively adapts to an opponent's shifting stance and achieves favorable outcomes through enhanced adaptability and strategic reasoning. Beyond improving negotiation performance, it also serves as a powerful coaching tool, offering interpretable strategic feedback and optimal offer recommendations.},
note = {arXiv:2503.07129 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Fonseca, Henrique Correia Da; Melo, Celso M. De; Terada, Kazunori; Gratch, Jonathan; Paiva, Ana S.; Santos, Francisco C.
Evolution of indirect reciprocity under emotion expression Journal Article
In: Sci Rep, vol. 15, no. 1, pp. 9151, 2025, ISSN: 2045-2322.
@article{correia_da_fonseca_evolution_2025,
title = {Evolution of indirect reciprocity under emotion expression},
author = {Henrique Correia Da Fonseca and Celso M. De Melo and Kazunori Terada and Jonathan Gratch and Ana S. Paiva and Francisco C. Santos},
url = {https://www.nature.com/articles/s41598-025-89588-8},
doi = {10.1038/s41598-025-89588-8},
issn = {2045-2322},
year = {2025},
date = {2025-03-01},
urldate = {2025-03-20},
journal = {Sci Rep},
volume = {15},
number = {1},
pages = {9151},
abstract = {Abstract
Do emotion expressions impact the evolution of cooperation? Indirect Reciprocity offers a solution to the cooperation dilemma with prior work focusing on the role of social norms in propagating others’ reputations and contributing to evolutionarily stable cooperation. Recent experimental studies, however, show that emotion expressions shape pro-social behaviour, communicate one’s intentions to others, and serve an error-correcting function; yet, the role of emotion signals in the evolution of cooperation remains unexplored. We present the first model of IR based on evolutionary game theory that exposes how emotion expressions positively influence the evolution of cooperation, particularly in scenarios of frequent errors. Our findings provide evolutionary support for the existence of emotion-based social norms, which help foster cooperation among unrelated individuals.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Do emotion expressions impact the evolution of cooperation? Indirect Reciprocity offers a solution to the cooperation dilemma with prior work focusing on the role of social norms in propagating others’ reputations and contributing to evolutionarily stable cooperation. Recent experimental studies, however, show that emotion expressions shape pro-social behaviour, communicate one’s intentions to others, and serve an error-correcting function; yet, the role of emotion signals in the evolution of cooperation remains unexplored. We present the first model of IR based on evolutionary game theory that exposes how emotion expressions positively influence the evolution of cooperation, particularly in scenarios of frequent errors. Our findings provide evolutionary support for the existence of emotion-based social norms, which help foster cooperation among unrelated individuals.
Jin, Zhangyu; Feng, Andrew; Chemburkar, Ankur; Melo, Celso M. De
PromptGAR: Flexible Promptive Group Activity Recognition Miscellaneous
2025, (arXiv:2503.08933 [cs]).
@misc{jin_promptgar_2025,
title = {PromptGAR: Flexible Promptive Group Activity Recognition},
author = {Zhangyu Jin and Andrew Feng and Ankur Chemburkar and Celso M. De Melo},
url = {http://arxiv.org/abs/2503.08933},
doi = {10.48550/arXiv.2503.08933},
year = {2025},
date = {2025-03-01},
urldate = {2025-03-20},
publisher = {arXiv},
abstract = {We present PromptGAR, a novel framework that addresses the limitations of current Group Activity Recognition (GAR) approaches by leveraging multi-modal prompts to achieve both input flexibility and high recognition accuracy. The existing approaches suffer from limited real-world applicability due to their reliance on full prompt annotations, the lack of long-term actor consistency, and under-exploration of multi-group scenarios. To bridge the gap, we proposed PromptGAR, which is the first GAR model to provide input flexibility across prompts, frames, and instances without the need for retraining. Specifically, we unify bounding boxes, skeletal keypoints, and areas as point prompts and employ a recognition decoder for cross-updating class and prompt tokens. To ensure long-term consistency for extended activity durations, we also introduce a relative instance attention mechanism that directly encodes instance IDs. Finally, PromptGAR explores the use of area prompts to enable the selective recognition of the particular group activity within videos that contain multiple concurrent groups. Comprehensive evaluations demonstrate that PromptGAR achieves competitive performances both on full prompts and diverse prompt inputs, establishing its effectiveness on input flexibility and generalization ability for real-world applications.},
note = {arXiv:2503.08933 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Liu, Ruying; Becerik-Gerber, Burçin; Lucas, Gale M.
Investigating Role of Personal Factors in Shaping Responses to Active Shooter Incident using Machine Learning Miscellaneous
2025, (arXiv:2503.05719 [cs]).
@misc{liu_investigating_2025,
title = {Investigating Role of Personal Factors in Shaping Responses to Active Shooter Incident using Machine Learning},
author = {Ruying Liu and Burçin Becerik-Gerber and Gale M. Lucas},
url = {http://arxiv.org/abs/2503.05719},
doi = {10.48550/arXiv.2503.05719},
year = {2025},
date = {2025-02-01},
urldate = {2025-03-18},
publisher = {arXiv},
abstract = {This study bridges the knowledge gap on how personal factors affect building occupants' responses in active shooter situations by applying interpretable machine learning methods to data from 107 participants. The personal factors studied are training methods, prior training experience, sense of direction, and gender. The response performance measurements consist of decisions (run, hide, multiple), vulnerability (corresponding to the time a participant is visible to a shooter), and pre-evacuation time. The results indicate that the propensity to run significantly determines overall response strategies, overshadowing vulnerability, and pre-evacuation time. The training method is a critical factor where VR-based training leads to better responses than video-based training. A better sense of direction and previous training experience are correlated with a greater propensity to run and less vulnerability. Gender slightly influences decisions and vulnerability but significantly impacts pre-evacuation time, with females evacuating slower, potentially due to higher risk perception. This study underscores the importance of personal factors in shaping responses to active shooter incidents.},
note = {arXiv:2503.05719 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Huang, Huajian; Chen, Yingshu; Li, Longwei; Cheng, Hui; Braud, Tristan; Zhao, Yajie; Yeung, Sai-Kit
SC-OmniGS: Self-Calibrating Omnidirectional Gaussian Splatting Miscellaneous
2025, (arXiv:2502.04734 [cs]).
@misc{huang_sc-omnigs_2025,
title = {SC-OmniGS: Self-Calibrating Omnidirectional Gaussian Splatting},
author = {Huajian Huang and Yingshu Chen and Longwei Li and Hui Cheng and Tristan Braud and Yajie Zhao and Sai-Kit Yeung},
url = {http://arxiv.org/abs/2502.04734},
doi = {10.48550/arXiv.2502.04734},
year = {2025},
date = {2025-02-01},
urldate = {2025-03-18},
publisher = {arXiv},
abstract = {360-degree cameras streamline data collection for radiance field 3D reconstruction by capturing comprehensive scene data. However, traditional radiance field methods do not address the specific challenges inherent to 360-degree images. We present SC-OmniGS, a novel self-calibrating omnidirectional Gaussian splatting system for fast and accurate omnidirectional radiance field reconstruction using 360-degree images. Rather than converting 360-degree images to cube maps and performing perspective image calibration, we treat 360-degree images as a whole sphere and derive a mathematical framework that enables direct omnidirectional camera pose calibration accompanied by 3D Gaussians optimization. Furthermore, we introduce a differentiable omnidirectional camera model in order to rectify the distortion of real-world data for performance enhancement. Overall, the omnidirectional camera intrinsic model, extrinsic poses, and 3D Gaussians are jointly optimized by minimizing weighted spherical photometric loss. Extensive experiments have demonstrated that our proposed SC-OmniGS is able to recover a high-quality radiance field from noisy camera poses or even no pose prior in challenging scenarios characterized by wide baselines and non-object-centric configurations. The noticeable performance gain in the real-world dataset captured by consumer-grade omnidirectional cameras verifies the effectiveness of our general omnidirectional camera model in reducing the distortion of 360-degree images.},
note = {arXiv:2502.04734 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Roth, Holger R.; Xu, Ziyue; Chen, Chester; Xu, Daguang; Dogra, Prerna; Flores, Mona; Cheng, Yan; Feng, Andrew
Overview of real-world applications of federated learning with NVIDIA FLARE Journal Article
In: Journal of Biopharmaceutical Statistics, pp. 1–11, 2025, ISSN: 1054-3406, 1520-5711.
@article{roth_overview_2025,
title = {Overview of real-world applications of federated learning with NVIDIA FLARE},
author = {Holger R. Roth and Ziyue Xu and Chester Chen and Daguang Xu and Prerna Dogra and Mona Flores and Yan Cheng and Andrew Feng},
url = {https://www.tandfonline.com/doi/full/10.1080/10543406.2025.2456174},
doi = {10.1080/10543406.2025.2456174},
issn = {1054-3406, 1520-5711},
year = {2025},
date = {2025-02-01},
urldate = {2025-03-20},
journal = {Journal of Biopharmaceutical Statistics},
pages = {1–11},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tak, Ala N.; Banayeeanzade, Amin; Bolourani, Anahita; Kian, Mina; Jia, Robin; Gratch, Jonathan
Mechanistic Interpretability of Emotion Inference in Large Language Models Miscellaneous
2025, (arXiv:2502.05489 [cs]).
@misc{tak_mechanistic_2025,
title = {Mechanistic Interpretability of Emotion Inference in Large Language Models},
author = {Ala N. Tak and Amin Banayeeanzade and Anahita Bolourani and Mina Kian and Robin Jia and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.05489},
doi = {10.48550/arXiv.2502.05489},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Large language models (LLMs) show promising capabilities in predicting human emotions from text. However, the mechanisms through which these models process emotional stimuli remain largely unexplored. Our study addresses this gap by investigating how autoregressive LLMs infer emotions, showing that emotion representations are functionally localized to specific regions in the model. Our evaluation includes diverse model families and sizes and is supported by robustness checks. We then show that the identified representations are psychologically plausible by drawing on cognitive appraisal theory, a well-established psychological framework positing that emotions emerge from evaluations (appraisals) of environmental stimuli. By causally intervening on construed appraisal concepts, we steer the generation and show that the outputs align with theoretical and intuitive expectations. This work highlights a novel way to causally intervene and precisely shape emotional text generation, potentially benefiting safety and alignment in sensitive affective domains.},
note = {arXiv:2502.05489 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Liu, Ruying; Becerik-Gerber, Burcin; Lucas, Gale M.; Busta, Kelly
Impact of behavior-based virtual training on active shooter incident preparedness in healthcare facilities Journal Article
In: International Journal of Disaster Risk Reduction, vol. 118, pp. 105225, 2025, ISSN: 22124209.
@article{liu_impact_2025,
title = {Impact of behavior-based virtual training on active shooter incident preparedness in healthcare facilities},
author = {Ruying Liu and Burcin Becerik-Gerber and Gale M. Lucas and Kelly Busta},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2212420925000494},
doi = {10.1016/j.ijdrr.2025.105225},
issn = {22124209},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
journal = {International Journal of Disaster Risk Reduction},
volume = {118},
pages = {105225},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Brun, Antonin; Liu, Ruying; Shukla, Aryan; Watson, Frances; Gratch, Jonathan
Exploring Emotion-Sensitive LLM-Based Conversational AI Miscellaneous
2025, (arXiv:2502.08920 [cs]).
@misc{brun_exploring_2025,
title = {Exploring Emotion-Sensitive LLM-Based Conversational AI},
author = {Antonin Brun and Ruying Liu and Aryan Shukla and Frances Watson and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.08920},
doi = {10.48550/arXiv.2502.08920},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Conversational AI chatbots have become increasingly common within the customer service industry. Despite improvements in their emotional development, they often lack the authenticity of real customer service interactions or the competence of service providers. By comparing emotion-sensitive and emotion-insensitive LLM-based chatbots across 30 participants, we aim to explore how emotional sensitivity in chatbots influences perceived competence and overall customer satisfaction in service interactions. Additionally, we employ sentiment analysis techniques to analyze and interpret the emotional content of user inputs. We highlight that perceptions of chatbot trustworthiness and competence were higher in the case of the emotion-sensitive chatbot, even if issue resolution rates were not affected. We discuss implications of improved user satisfaction from emotion-sensitive chatbots and potential applications in support services.},
note = {arXiv:2502.08920 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Cai, Yunxuan; Xiang, Sitao; Li, Zongjian; Chen, Haiwei; Zhao, Yajie
Bringing Diversity from Diffusion Models to Semantic-Guided Face Asset Generation Miscellaneous
2025, (Version Number: 1).
@misc{cai_bringing_2025,
title = {Bringing Diversity from Diffusion Models to Semantic-Guided Face Asset Generation},
author = {Yunxuan Cai and Sitao Xiang and Zongjian Li and Haiwei Chen and Yajie Zhao},
url = {https://arxiv.org/abs/2504.15259},
doi = {10.48550/ARXIV.2504.15259},
year = {2025},
date = {2025-01-01},
urldate = {2025-06-25},
publisher = {arXiv},
abstract = {Digital modeling and reconstruction of human faces serve various applications. However, its availability is often hindered by the requirements of data capturing devices, manual labor, and suitable actors. This situation restricts the diversity, expressiveness, and control over the resulting models. This work aims to demonstrate that a semantically controllable generative network can provide enhanced control over the digital face modeling process. To enhance diversity beyond the limited human faces scanned in a controlled setting, we introduce a novel data generation pipeline that creates a high-quality 3D face database using a pre-trained diffusion model. Our proposed normalization module converts synthesized data from the diffusion model into high-quality scanned data. Using the 44,000 face models we obtained, we further developed an efficient GAN-based generator. This generator accepts semantic attributes as input, and generates geometry and albedo. It also allows continuous post-editing of attributes in the latent space. Our asset refinement component subsequently creates physically-based facial assets. We introduce a comprehensive system designed for creating and editing high-quality face assets. Our proposed model has undergone extensive experiment, comparison and evaluation. We also integrate everything into a web-based interactive tool. We aim to make this tool publicly available with the release of the paper.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Kang, Seoyoung; Yoon, Boram; Kim, Kangsoo; Gratch, Jonathan; Woo, Woontack
How Collaboration Context and Personality Traits Shape the Social Norms of Human-to-Avatar Identity Representation Journal Article
In: IEEE Trans. Visual. Comput. Graphics, pp. 1–10, 2025, ISSN: 1077-2626, 1941-0506, 2160-9306.
@article{kang_how_2025,
title = {How Collaboration Context and Personality Traits Shape the Social Norms of Human-to-Avatar Identity Representation},
author = {Seoyoung Kang and Boram Yoon and Kangsoo Kim and Jonathan Gratch and Woontack Woo},
url = {https://ieeexplore.ieee.org/document/10935702/},
doi = {10.1109/TVCG.2025.3549904},
issn = {1077-2626, 1941-0506, 2160-9306},
year = {2025},
date = {2025-01-01},
urldate = {2025-04-17},
journal = {IEEE Trans. Visual. Comput. Graphics},
pages = {1–10},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tran, Minh; Yin, Yufeng; Soleymani, Mohammad
SetPeER: Set-Based Personalized Emotion Recognition With Weak Supervision Journal Article
In: IEEE Trans. Affective Comput., pp. 1–15, 2025, ISSN: 1949-3045, 2371-9850.
@article{tran_setpeer_2025,
title = {SetPeER: Set-Based Personalized Emotion Recognition With Weak Supervision},
author = {Minh Tran and Yufeng Yin and Mohammad Soleymani},
url = {https://ieeexplore.ieee.org/document/10993348/},
doi = {10.1109/TAFFC.2025.3568024},
issn = {1949-3045, 2371-9850},
year = {2025},
date = {2025-01-01},
urldate = {2025-05-20},
journal = {IEEE Trans. Affective Comput.},
pages = {1–15},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Ning; Hurt, Timothy; Krakowski, Ari; Greenwald, Eric; Hammerman, Jim; Santos, Sabrina De Los; Masur, Omkar; Fu, Boxi; Merchant, Chirag
Virtually Human: An Exhibit for Public AI Education Book Section
In: Stephanidis, Constantine; Antona, Margherita; Ntoa, Stavroula; Salvendy, Gavriel (Ed.): HCI International 2025 Posters, vol. 2529, pp. 436–443, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-94170-2 978-3-031-94171-9, (Series Title: Communications in Computer and Information Science).
@incollection{stephanidis_virtually_2025,
title = {Virtually Human: An Exhibit for Public AI Education},
author = {Ning Wang and Timothy Hurt and Ari Krakowski and Eric Greenwald and Jim Hammerman and Sabrina De Los Santos and Omkar Masur and Boxi Fu and Chirag Merchant},
editor = {Constantine Stephanidis and Margherita Antona and Stavroula Ntoa and Gavriel Salvendy},
url = {https://link.springer.com/10.1007/978-3-031-94171-9_42},
doi = {10.1007/978-3-031-94171-9_42},
isbn = {978-3-031-94170-2 978-3-031-94171-9},
year = {2025},
date = {2025-01-01},
urldate = {2025-06-17},
booktitle = {HCI International 2025 Posters},
volume = {2529},
pages = {436–443},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Communications in Computer and Information Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Hu, Yue; Liu, Rong; Chen, Meida; Beerel, Peter; Feng, Andrew
SplatMAP: Online Dense Monocular SLAM with 3D Gaussian Splatting Miscellaneous
2025, (arXiv:2501.07015 [cs]).
@misc{hu_splatmap_2025,
title = {SplatMAP: Online Dense Monocular SLAM with 3D Gaussian Splatting},
author = {Yue Hu and Rong Liu and Meida Chen and Peter Beerel and Andrew Feng},
url = {http://arxiv.org/abs/2501.07015},
doi = {10.48550/arXiv.2501.07015},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Achieving high-fidelity 3D reconstruction from monocular video remains challenging due to the inherent limitations of traditional methods like Structure-from-Motion (SfM) and monocular SLAM in accurately capturing scene details. While differentiable rendering techniques such as Neural Radiance Fields (NeRF) address some of these challenges, their high computational costs make them unsuitable for real-time applications. Additionally, existing 3D Gaussian Splatting (3DGS) methods often focus on photometric consistency, neglecting geometric accuracy and failing to exploit SLAM's dynamic depth and pose updates for scene refinement. We propose a framework integrating dense SLAM with 3DGS for real-time, high-fidelity dense reconstruction. Our approach introduces SLAM-Informed Adaptive Densification, which dynamically updates and densifies the Gaussian model by leveraging dense point clouds from SLAM. Additionally, we incorporate Geometry-Guided Optimization, which combines edge-aware geometric constraints and photometric consistency to jointly optimize the appearance and geometry of the 3DGS scene representation, enabling detailed and accurate SLAM mapping reconstruction. Experiments on the Replica and TUM-RGBD datasets demonstrate the effectiveness of our approach, achieving state-of-the-art results among monocular systems. Specifically, our method achieves a PSNR of 36.864, SSIM of 0.985, and LPIPS of 0.040 on Replica, representing improvements of 10.7%, 6.4%, and 49.4%, respectively, over the previous SOTA. On TUM-RGBD, our method outperforms the closest baseline by 10.2%, 6.6%, and 34.7% in the same metrics. These results highlight the potential of our framework in bridging the gap between photometric and geometric dense 3D scene representations, paving the way for practical and efficient monocular dense reconstruction.},
note = {arXiv:2501.07015 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Rizzo, Albert “Skip”; Giosan, Cezar; Deac, George; Zaporozhets, Olya; Syvak, Oksana; Dragayeva, Svetlana; Bodner, Ehud; Mann, Shel; Stone, Jessica
The Virtual Ukraine Project: Trauma Therapy in Warzones with Virtual Reality Book Section
In: Stone, Jessica (Ed.): Mental Health Virtual Reality, pp. 159–180, Wiley, 2025, ISBN: 978-1-394-27845-9 978-1-394-27848-0.
@incollection{stone_virtual_2025,
title = {The Virtual Ukraine Project: Trauma Therapy in Warzones with Virtual Reality},
author = {Albert “Skip” Rizzo and Cezar Giosan and George Deac and Olya Zaporozhets and Oksana Syvak and Svetlana Dragayeva and Ehud Bodner and Shel Mann and Jessica Stone},
editor = {Jessica Stone},
url = {https://onlinelibrary.wiley.com/doi/10.1002/9781394278480.ch12},
doi = {10.1002/9781394278480.ch12},
isbn = {978-1-394-27845-9 978-1-394-27848-0},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-16},
booktitle = {Mental Health Virtual Reality},
pages = {159–180},
publisher = {Wiley},
edition = {1},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Liu, Rong; Sun, Dylan; Chen, Meida; Wang, Yue; Feng, Andrew
Deformable Beta Splatting Miscellaneous
2025, (arXiv:2501.18630 [cs]).
@misc{liu_deformable_2025,
title = {Deformable Beta Splatting},
author = {Rong Liu and Dylan Sun and Meida Chen and Yue Wang and Andrew Feng},
url = {http://arxiv.org/abs/2501.18630},
doi = {10.48550/arXiv.2501.18630},
year = {2025},
date = {2025-01-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has advanced radiance field reconstruction by enabling real-time rendering. However, its reliance on Gaussian kernels for geometry and low-order Spherical Harmonics (SH) for color encoding limits its ability to capture complex geometries and diverse colors. We introduce Deformable Beta Splatting (DBS), a deformable and compact approach that enhances both geometry and color representation. DBS replaces Gaussian kernels with deformable Beta Kernels, which offer bounded support and adaptive frequency control to capture fine geometric details with higher fidelity while achieving better memory efficiency. In addition, we extended the Beta Kernel to color encoding, which facilitates improved representation of diffuse and specular components, yielding superior results compared to SH-based methods. Furthermore, Unlike prior densification techniques that depend on Gaussian properties, we mathematically prove that adjusting regularized opacity alone ensures distribution-preserved Markov chain Monte Carlo (MCMC), independent of the splatting kernel type. Experimental results demonstrate that DBS achieves state-of-the-art visual quality while utilizing only 45% of the parameters and rendering 1.5x faster than 3DGS-based methods. Notably, for the first time, splatting-based methods outperform state-of-the-art Neural Radiance Fields, highlighting the superior performance and efficiency of DBS for real-time radiance field rendering.},
note = {arXiv:2501.18630 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Chang, Di; Xu, Hongyi; Xie, You; Gao, Yipeng; Kuang, Zhengfei; Cai, Shengqu; Zhang, Chenxu; Song, Guoxian; Wang, Chao; Shi, Yichun; Chen, Zeyuan; Zhou, Shijie; Luo, Linjie; Wetzstein, Gordon; Soleymani, Mohammad
X-Dyna: Expressive Dynamic Human Image Animation Miscellaneous
2025, (arXiv:2501.10021 [cs]).
@misc{chang_x-dyna_2025,
title = {X-Dyna: Expressive Dynamic Human Image Animation},
author = {Di Chang and Hongyi Xu and You Xie and Yipeng Gao and Zhengfei Kuang and Shengqu Cai and Chenxu Zhang and Guoxian Song and Chao Wang and Yichun Shi and Zeyuan Chen and Shijie Zhou and Linjie Luo and Gordon Wetzstein and Mohammad Soleymani},
url = {http://arxiv.org/abs/2501.10021},
doi = {10.48550/arXiv.2501.10021},
year = {2025},
date = {2025-01-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {We introduce X-Dyna, a novel zero-shot, diffusion-based pipeline for animating a single human image using facial expressions and body movements derived from a driving video, that generates realistic, context-aware dynamics for both the subject and the surrounding environment. Building on prior approaches centered on human pose control, X-Dyna addresses key shortcomings causing the loss of dynamic details, enhancing the lifelike qualities of human video animations. At the core of our approach is the Dynamics-Adapter, a lightweight module that effectively integrates reference appearance context into the spatial attentions of the diffusion backbone while preserving the capacity of motion modules in synthesizing fluid and intricate dynamic details. Beyond body pose control, we connect a local control module with our model to capture identity-disentangled facial expressions, facilitating accurate expression transfer for enhanced realism in animated scenes. Together, these components form a unified framework capable of learning physical human motion and natural scene dynamics from a diverse blend of human and scene videos. Comprehensive qualitative and quantitative evaluations demonstrate that X-Dyna outperforms state-of-the-art methods, creating highly lifelike and expressive animations. The code is available at https://github.com/bytedance/X-Dyna.},
note = {arXiv:2501.10021 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Rodrigues, Patrick Borges; Becerik-Gerber, Burcin; Soibelman, Lucio; Lucas, Gale M.; Roll, Shawn C.
Impact of selective environmental sound attenuation on operator performance, stress, attention, and task engagement in teleoperated demolition Journal Article
In: Automation in Construction, vol. 169, pp. 105876, 2025, ISSN: 09265805.
@article{rodrigues_impact_2025,
title = {Impact of selective environmental sound attenuation on operator performance, stress, attention, and task engagement in teleoperated demolition},
author = {Patrick Borges Rodrigues and Burcin Becerik-Gerber and Lucio Soibelman and Gale M. Lucas and Shawn C. Roll},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0926580524006125},
doi = {10.1016/j.autcon.2024.105876},
issn = {09265805},
year = {2025},
date = {2025-01-01},
urldate = {2024-12-20},
journal = {Automation in Construction},
volume = {169},
pages = {105876},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Siniukov, Maksim; Xing, Ellie; Sanaz,; Isfahani, Attaripour; Soleymani, Mohammad
Towards a Generalizable Speech Marker for Parkinson's Disease Diagnosis Miscellaneous
2025, (Version Number: 1).
@misc{siniukov_towards_2025,
title = {Towards a Generalizable Speech Marker for Parkinson's Disease Diagnosis},
author = {Maksim Siniukov and Ellie Xing and Sanaz and Attaripour Isfahani and Mohammad Soleymani},
url = {https://arxiv.org/abs/2501.03581},
doi = {10.48550/ARXIV.2501.03581},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-14},
publisher = {arXiv},
abstract = {Parkinson's Disease (PD) is a neurodegenerative disorder characterized by motor symptoms, including altered voice production in the early stages. Early diagnosis is crucial not only to improve PD patients' quality of life but also to enhance the efficacy of potential disease-modifying therapies during early neurodegeneration, a window often missed by current diagnostic tools. In this paper, we propose a more generalizable approach to PD recognition through domain adaptation and self-supervised learning. We demonstrate the generalization capabilities of the proposed approach across diverse datasets in different languages. Our approach leverages HuBERT, a large deep neural network originally trained for speech recognition and further trains it on unlabeled speech data from a population that is similar to the target group, i.e., the elderly, in a self-supervised manner. The model is then fine-tuned and adapted for use across different datasets in multiple languages, including English, Italian, and Spanish. Evaluations on four publicly available PD datasets demonstrate the model's efficacy, achieving an average specificity of 92.1% and an average sensitivity of 91.2%. This method offers objective and consistent evaluations across large populations, addressing the variability inherent in human assessments and providing a non-invasive, cost-effective and accessible diagnostic option.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Filter
2024
Chen, Meida; Han, Kangle; Yu, Zifan; Feng, Andrew; Hou, Yu; You, Suya; Soibelman, Lucio
An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation Journal Article
In: Remote Sensing, vol. 16, no. 22, pp. 4240, 2024, ISSN: 2072-4292.
Abstract | Links | BibTeX | Tags: DTIC, VGL
@article{chen_aerial_2024,
title = {An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation},
author = {Meida Chen and Kangle Han and Zifan Yu and Andrew Feng and Yu Hou and Suya You and Lucio Soibelman},
url = {https://www.mdpi.com/2072-4292/16/22/4240},
doi = {10.3390/rs16224240},
issn = {2072-4292},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Remote Sensing},
volume = {16},
number = {22},
pages = {4240},
abstract = {The recent surge in diverse 3D datasets spanning various scales and applications marks a significant advancement in the field. However, the comprehensive process of data acquisition, refinement, and annotation at a large scale poses a formidable challenge, particularly for individual researchers and small teams. To this end, we present a novel synthetic 3D point cloud generation framework that can produce detailed outdoor aerial photogrammetric 3D datasets with accurate ground truth annotations without the labor-intensive and time-consuming data collection/annotation processes. Our pipeline procedurally generates synthetic environments, mirroring real-world data collection and 3D reconstruction processes. A key feature of our framework is its ability to replicate consistent quality, noise patterns, and diversity similar to real-world datasets. This is achieved by adopting UAV flight patterns that resemble those used in real-world data collection processes (e.g., the cross-hatch flight pattern) across various synthetic terrains that are procedurally generated, thereby ensuring data consistency akin to real-world scenarios. Moreover, the generated datasets are enriched with precise semantic and instance annotations, eliminating the need for manual labeling. Our approach has led to the development and release of the Semantic Terrain Points Labeling—Synthetic 3D (STPLS3D) benchmark, an extensive outdoor 3D dataset encompassing over 16 km2, featuring up to 19 semantic labels. We also collected, reconstructed, and annotated four real-world datasets for validation purposes. Extensive experiments on these datasets demonstrate our synthetic datasets’ effectiveness, superior quality, and their value as a benchmark dataset for further point cloud research.},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {article}
}
Bonial, Claire; Lukin, Stephanie M.; Abrams, Mitchell; Baker, Anthony; Donatelli, Lucia; Foots, Ashley; Hayes, Cory J.; Henry, Cassidy; Hudson, Taylor; Marge, Matthew; Pollard, Kimberly A.; Artstein, Ron; Traum, David; Voss, Clare R.
Human–robot dialogue annotation for multi-modal common ground Journal Article
In: Lang Resources & Evaluation, 2024, ISSN: 1574-020X, 1574-0218.
Links | BibTeX | Tags: DTIC, Virtual Humans
@article{bonial_humanrobot_2024,
title = {Human–robot dialogue annotation for multi-modal common ground},
author = {Claire Bonial and Stephanie M. Lukin and Mitchell Abrams and Anthony Baker and Lucia Donatelli and Ashley Foots and Cory J. Hayes and Cassidy Henry and Taylor Hudson and Matthew Marge and Kimberly A. Pollard and Ron Artstein and David Traum and Clare R. Voss},
url = {https://link.springer.com/10.1007/s10579-024-09784-2},
doi = {10.1007/s10579-024-09784-2},
issn = {1574-020X, 1574-0218},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Lang Resources & Evaluation},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Marti, Deniz; Budathoki, Anjila; Ding, Yi; Lucas, Gale; Nelson, David
How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations? Journal Article
In: International Journal of Human–Computer Interaction, pp. 1–12, 2024, ISSN: 1044-7318, 1532-7590.
Links | BibTeX | Tags: DTIC, Virtual Humans
@article{marti_how_2024,
title = {How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations?},
author = {Deniz Marti and Anjila Budathoki and Yi Ding and Gale Lucas and David Nelson},
url = {https://www.tandfonline.com/doi/full/10.1080/10447318.2024.2426035},
doi = {10.1080/10447318.2024.2426035},
issn = {1044-7318, 1532-7590},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {International Journal of Human–Computer Interaction},
pages = {1–12},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Vlake, Johan H; Drop, Denzel L Q; Bommel, Jasper Van; Riva, Giuseppe; Wiederhold, Brenda K; Cipresso, Pietro; Rizzo, Albert S; Rothbaum, Barbara O; Botella, Cristina; Hooft, Lotty; Bienvenu, Oscar J; Jung, Christian; Geerts, Bart; Wils, Evert-Jan; Gommers, Diederik; Genderen, Michel E Van; Group, RATE-XR Expert
Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline Journal Article
In: J Med Internet Res, vol. 26, pp. e56790, 2024, ISSN: 1438-8871.
Abstract | Links | BibTeX | Tags: MedVR
@article{vlake_reporting_2024,
title = {Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline},
author = {Johan H Vlake and Denzel L Q Drop and Jasper Van Bommel and Giuseppe Riva and Brenda K Wiederhold and Pietro Cipresso and Albert S Rizzo and Barbara O Rothbaum and Cristina Botella and Lotty Hooft and Oscar J Bienvenu and Christian Jung and Bart Geerts and Evert-Jan Wils and Diederik Gommers and Michel E Van Genderen and RATE-XR Expert Group},
url = {https://www.jmir.org/2024/1/e56790},
doi = {10.2196/56790},
issn = {1438-8871},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {J Med Internet Res},
volume = {26},
pages = {e56790},
abstract = {Background
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.
Roemmele, Melissa; Gordon, Andrew S.
From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC, Learning Sciences
@misc{roemmele_test-taking_2024-1,
title = {From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items},
author = {Melissa Roemmele and Andrew S. Gordon},
url = {https://arxiv.org/abs/2410.14897},
doi = {10.48550/ARXIV.2410.14897},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-05},
publisher = {arXiv},
abstract = {LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.},
note = {Version Number: 1},
keywords = {DTIC, Learning Sciences},
pubstate = {published},
tppubtype = {misc}
}
Lin, Spencer; Rizk, Basem; Jun, Miru; Artze, Andy; Sullivan, Caitlin; Mozgai, Sharon; Fisher, Scott
Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents Miscellaneous
2024, (arXiv:2410.20116 [cs]).
Abstract | Links | BibTeX | Tags: Virtual Agents
@misc{lin_estuary_2024,
title = {Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents},
author = {Spencer Lin and Basem Rizk and Miru Jun and Andy Artze and Caitlin Sullivan and Sharon Mozgai and Scott Fisher},
url = {http://arxiv.org/abs/2410.20116},
doi = {10.1145/3652988.3696198},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
abstract = {The rise in capability and ubiquity of generative artificial intelligence (AI) technologies has enabled its application to the field of Socially Interactive Agents (SIAs). Despite rising interest in modern AI-powered components used for real-time SIA research, substantial friction remains due to the absence of a standardized and universal SIA framework. To target this absence, we developed Estuary: a multimodal (text, audio, and soon video) framework which facilitates the development of low-latency, real-time SIAs. Estuary seeks to reduce repeat work between studies and to provide a flexible platform that can be run entirely off-cloud to maximize configurability, controllability, reproducibility of studies, and speed of agent response times. We are able to do this by constructing a robust multimodal framework which incorporates current and future components seamlessly into a modular and interoperable architecture.},
note = {arXiv:2410.20116 [cs]},
keywords = {Virtual Agents},
pubstate = {published},
tppubtype = {misc}
}
Tran, Minh; Kim, Yelin; Su, Che-Chun; Kuo, Cheng-Hao; Sun, Min; Soleymani, Mohammad
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, vol. 15138, pp. 1–19, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-72988-1 978-3-031-72989-8, (Series Title: Lecture Notes in Computer Science).
@incollection{leonardis_ex2eg-mae_2024,
title = {Ex2Eg-MAE: A Framework for Adaptation of Exocentric Video Masked Autoencoders for Egocentric Social Role Understanding},
author = {Minh Tran and Yelin Kim and Che-Chun Su and Cheng-Hao Kuo and Min Sun and Mohammad Soleymani},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72989-8_1},
doi = {10.1007/978-3-031-72989-8_1},
isbn = {978-3-031-72988-1 978-3-031-72989-8},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
booktitle = {Computer Vision – ECCV 2024},
volume = {15138},
pages = {1–19},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Chen, Gonglin; Wu, Jinsen; Chen, Haiwei; Teng, Wenbin; Gao, Zhiyuan; Feng, Andrew; Qin, Rongjun; Zhao, Yajie
Geometry-aware Feature Matching for Large-Scale Structure from Motion Miscellaneous
2024, (Version Number: 3).
Abstract | Links | BibTeX | Tags: DTIC
@misc{chen_geometry-aware_2024,
title = {Geometry-aware Feature Matching for Large-Scale Structure from Motion},
author = {Gonglin Chen and Jinsen Wu and Haiwei Chen and Wenbin Teng and Zhiyuan Gao and Andrew Feng and Rongjun Qin and Yajie Zhao},
url = {https://arxiv.org/abs/2409.02310},
doi = {10.48550/ARXIV.2409.02310},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Establishing consistent and dense correspondences across multiple images is crucial for Structure from Motion (SfM) systems. Significant view changes, such as air-to-ground with very sparse view overlap, pose an even greater challenge to the correspondence solvers. We present a novel optimization-based approach that significantly enhances existing feature matching methods by introducing geometry cues in addition to color cues. This helps fill gaps when there is less overlap in large-scale scenarios. Our method formulates geometric verification as an optimization problem, guiding feature matching within detector-free methods and using sparse correspondences from detector-based methods as anchor points. By enforcing geometric constraints via the Sampson Distance, our approach ensures that the denser correspondences from detector-free methods are geometrically consistent and more accurate. This hybrid strategy significantly improves correspondence density and accuracy, mitigates multi-view inconsistencies, and leads to notable advancements in camera pose accuracy and point cloud density. It outperforms state-of-the-art feature matching methods on benchmark datasets and enables feature matching in challenging extreme large-scale settings.},
note = {Version Number: 3},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Pitfalls of Embodiment in Human-Agent Experiment Design Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–9, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_pitfalls_2024,
title = {Pitfalls of Embodiment in Human-Agent Experiment Design},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3673958},
doi = {10.1145/3652988.3673958},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–9},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {DTIC},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, Zhiyuan; Teng, Wenbin; Chen, Gonglin; Wu, Jinsen; Xu, Ningli; Qin, Rongjun; Feng, Andrew; Zhao, Yajie
Skyeyes: Ground Roaming using Aerial View Images Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC
@misc{gao_skyeyes_2024,
title = {Skyeyes: Ground Roaming using Aerial View Images},
author = {Zhiyuan Gao and Wenbin Teng and Gonglin Chen and Jinsen Wu and Ningli Xu and Rongjun Qin and Andrew Feng and Yajie Zhao},
url = {https://arxiv.org/abs/2409.16685},
doi = {10.48550/ARXIV.2409.16685},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Integrating aerial imagery-based scene generation into applications like autonomous driving and gaming enhances realism in 3D environments, but challenges remain in creating detailed content for occluded areas and ensuring real-time, consistent rendering. In this paper, we introduce Skyeyes, a novel framework that can generate photorealistic sequences of ground view images using only aerial view inputs, thereby creating a ground roaming experience. More specifically, we combine a 3D representation with a view consistent generation model, which ensures coherence between generated images. This method allows for the creation of geometrically consistent ground view images, even with large view gaps. The images maintain improved spatial-temporal coherence and realism, enhancing scene comprehension and visualization from aerial perspectives. To the best of our knowledge, there are no publicly available datasets that contain pairwise geo-aligned aerial and ground view imagery. Therefore, we build a large, synthetic, and geo-aligned dataset using Unreal Engine. Both qualitative and quantitative analyses on this synthetic dataset display superior results compared to other leading synthesis approaches. See the project page for more results: https://chaoren2357.github.io/website-skyeyes/.},
note = {Version Number: 1},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Integration of LLMs with Virtual Character Embodiment Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–3, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_integration_2024,
title = {Integration of LLMs with Virtual Character Embodiment},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3696199},
doi = {10.1145/3652988.3696199},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–3},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi
Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems Proceedings Article
In: Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 610–623, Association for Computational Linguistics, Kyoto, Japan, 2024.
Links | BibTeX | Tags: Dialogue, DTIC, Natural Language
@inproceedings{georgila_comparing_2024,
title = {Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://aclanthology.org/2024.sigdial-1.52},
doi = {10.18653/v1/2024.sigdial-1.52},
year = {2024},
date = {2024-09-01},
urldate = {2024-10-15},
booktitle = {Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue},
pages = {610–623},
publisher = {Association for Computational Linguistics},
address = {Kyoto, Japan},
keywords = {Dialogue, DTIC, Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Wang, Changzhao; Aguilar, Stephen J.; Bankard, Jennifer S.; Bui, Eric; Nye, Benjamin
Writing with AI: What College Students Learned from Utilizing ChatGPT for a Writing Assignment Journal Article
In: Education Sciences, vol. 14, no. 9, pp. 976, 2024, ISSN: 2227-7102, (Publisher: MDPI AG).
Abstract | Links | BibTeX | Tags: Learning Sciences
@article{wang_writing_2024,
title = {Writing with AI: What College Students Learned from Utilizing ChatGPT for a Writing Assignment},
author = {Changzhao Wang and Stephen J. Aguilar and Jennifer S. Bankard and Eric Bui and Benjamin Nye},
url = {https://www.mdpi.com/2227-7102/14/9/976},
doi = {10.3390/educsci14090976},
issn = {2227-7102},
year = {2024},
date = {2024-09-01},
urldate = {2024-09-17},
journal = {Education Sciences},
volume = {14},
number = {9},
pages = {976},
abstract = {To support the integration of AI in education, this empirical study investigated what lessons college students learned from using Generative AI for writing. We recruited 47 students in the United States from a university writing course. Students completed an assignment in which they used Generative AI tools (e.g., ChatGPT) to draft an application letter or personal statement. Data were collected using a survey of five open-ended questions about their writing process, what worked, what did not work, how to better write with AI, and general lessons learned. We applied thematic analysis and sentiment analysis methods to analyze students’ responses. Results show that (1) students went through multiple rounds of prompting; (2) students identified strengths of AI, such as connection to topic, template generation, and sentence quality; (3) the weaknesses of AI included general language, robotic tone and lacking emotion, lacking personal voice, and lacking critical thinking; (4) students wished to improve AI-generated writing by adding personal stories, connections to posting, feelings and thoughts, and deleting repetitive language; and (5) their overall attitudes toward AI tool were positive. We believe our findings can help relieve some concerns about cheating with AI. We also suggested strategies to regulate the use of AI.},
note = {Publisher: MDPI AG},
keywords = {Learning Sciences},
pubstate = {published},
tppubtype = {article}
}
Lucas, Gale M.; Becerik-Gerber, Burcin; Roll, Shawn C.
Calibrating workers’ trust in intelligent automated systems Journal Article
In: Patterns, vol. 5, no. 9, pp. 101045, 2024, ISSN: 2666-3899, (Publisher: Elsevier BV).
@article{lucas_calibrating_2024,
title = {Calibrating workers’ trust in intelligent automated systems},
author = {Gale M. Lucas and Burcin Becerik-Gerber and Shawn C. Roll},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2666389924001879},
doi = {10.1016/j.patter.2024.101045},
issn = {2666-3899},
year = {2024},
date = {2024-09-01},
urldate = {2024-09-17},
journal = {Patterns},
volume = {5},
number = {9},
pages = {101045},
note = {Publisher: Elsevier BV},
keywords = {DTIC},
pubstate = {published},
tppubtype = {article}
}
Liu, Xiao; Lei, Xuanyu; Wang, Shengyuan; Huang, Yue; Feng, Zhuoer; Wen, Bosi; Cheng, Jiale; Ke, Pei; Xu, Yifan; Tam, Weng Lam; Zhang, Xiaohan; Sun, Lichao; Gu, Xiaotao; Wang, Hongning; Zhang, Jing; Huang, Minlie; Dong, Yuxiao; Tang, Jie
AlignBench: Benchmarking Chinese Alignment of Large Language Models Miscellaneous
2024, (arXiv:2311.18743 [cs]).
Abstract | Links | BibTeX | Tags: Machine Learning
@misc{liu_alignbench_2024,
title = {AlignBench: Benchmarking Chinese Alignment of Large Language Models},
author = {Xiao Liu and Xuanyu Lei and Shengyuan Wang and Yue Huang and Zhuoer Feng and Bosi Wen and Jiale Cheng and Pei Ke and Yifan Xu and Weng Lam Tam and Xiaohan Zhang and Lichao Sun and Xiaotao Gu and Hongning Wang and Jing Zhang and Minlie Huang and Yuxiao Dong and Jie Tang},
url = {http://arxiv.org/abs/2311.18743},
doi = {10.48550/arXiv.2311.18743},
year = {2024},
date = {2024-08-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Alignment has become a critical step for instruction-tuned Large Language Models (LLMs) to become helpful assistants. However, the effective evaluation of alignment for emerging Chinese LLMs is still largely unexplored. To fill in this gap, we introduce AlignBench, a comprehensive multi-dimensional benchmark for evaluating LLMs' alignment in Chinese. We design a human-in-the-loop data curation pipeline, containing eight main categories, 683 real-scenario rooted queries and corresponding human verified references. To ensure the correctness of references, each knowledge-intensive query is accompanied with evidences collected from reliable web sources (including URLs and quotations) by our annotators. For automatic evaluation, our benchmark employs a rule-calibrated multi-dimensional LLM-as-Judgetextasciitildetextbackslashcitezheng2023judging approach with Chain-of-Thought to generate explanations and final ratings, ensuring high reliability and interpretability. All evaluation code, data, and LLM generations are available at textbackslashurlhttps://github.com/THUDM/AlignBench. Since its release, AlignBench has been adopted by top (Chinese) LLMs for evaluating their alignment capabilities in Chinese, including ChatGLM, Qwen, DeepSeek, Yi, Baichuan, and Abab.},
note = {arXiv:2311.18743 [cs]},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {misc}
}
Fischer, Katrin; Velentza, Anna-Maria; Lucas, Gale; Williams, Dmitri
Seeing Eye to Eye with Robots: An Experimental Study Predicting Trust in Social Robots for Domestic Use Proceedings Article
In: 2024 33rd IEEE International Conference on Robot and Human Interactive Communication (ROMAN), pp. 2162–2168, IEEE, Pasadena, CA, USA, 2024, ISBN: 979-8-3503-7502-2.
Links | BibTeX | Tags: DTIC, Virtual Humans
@inproceedings{fischer_seeing_2024,
title = {Seeing Eye to Eye with Robots: An Experimental Study Predicting Trust in Social Robots for Domestic Use},
author = {Katrin Fischer and Anna-Maria Velentza and Gale Lucas and Dmitri Williams},
url = {https://ieeexplore.ieee.org/document/10731371/},
doi = {10.1109/RO-MAN60168.2024.10731371},
isbn = {979-8-3503-7502-2},
year = {2024},
date = {2024-08-01},
urldate = {2024-12-05},
booktitle = {2024 33rd IEEE International Conference on Robot and Human Interactive Communication (ROMAN)},
pages = {2162–2168},
publisher = {IEEE},
address = {Pasadena, CA, USA},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {inproceedings}
}
Zaizar, Eric D.; Gramlich, Michael A.; Rizzo, Albert “Skip”; Reger, Greg M.; Norr, Aaron M.
In: Training and Education in Professional Psychology, 2024, ISSN: 1931-3926, 1931-3918.
Links | BibTeX | Tags: Virtual Humans
@article{zaizar_exploration_2024,
title = {Exploration of the impact of baseline clinician learner characteristics on motivational interviewing skill improvement following training with a virtual standardized patient.},
author = {Eric D. Zaizar and Michael A. Gramlich and Albert “Skip” Rizzo and Greg M. Reger and Aaron M. Norr},
url = {https://doi.apa.org/doi/10.1037/tep0000490},
doi = {10.1037/tep0000490},
issn = {1931-3926, 1931-3918},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-13},
journal = {Training and Education in Professional Psychology},
keywords = {Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Bodner, Ehud; Mikulincer, Mario; McMahon, Elizabeth; Rizzo, Albert
Reviving life that has ceased on October the 7th: an attachment perspective on a virtual reality intervention Journal Article
In: Front. Virtual Real., vol. 5, pp. 1438663, 2024, ISSN: 2673-4192.
Abstract | Links | BibTeX | Tags: MedVR
@article{bodner_reviving_2024,
title = {Reviving life that has ceased on October the 7th: an attachment perspective on a virtual reality intervention},
author = {Ehud Bodner and Mario Mikulincer and Elizabeth McMahon and Albert Rizzo},
url = {https://www.frontiersin.org/articles/10.3389/frvir.2024.1438663/full},
doi = {10.3389/frvir.2024.1438663},
issn = {2673-4192},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-15},
journal = {Front. Virtual Real.},
volume = {5},
pages = {1438663},
abstract = {Unfortunately, in recent years, wars have forced many civilians to evacuate their homes and move to safe zones. The event of October the seventh that took place in many Kibbutzim near the Gaza strip, exposed families who were on a Jewish holiday, to the murder of family and community members. They had to leave their burned houses and move to hotels and apartment buildings in other parts of Israel. Many people, also from the Northen parts of the country, are still in new safe zones, and have huge difficulties in returning to their houses (and not only because of objective security reasons). In this “perspective” article we propose a Virtual Reality (VR) application, which is based on past and current research in the fields of attachment theory and traumatic grief. We propose that in addition to the use of exposure therapy, a VR simulation which will activate the attachment system, can reorganize the evacuees’ figure and place attachment representations. We suggest that such a simulation will revive the evacuees’ sense of safe-haven and secure base and enable them to return to their home place, or to adjust to a new place, thereby leading to optimal adjustment. We start with a presentation of the theory of attachment, place attachment, attachment and loss and the two-track model of bereavement. Then, we describe the design of our VR intervention that aims to address this challenge from the attachment theory perspective with the evacuees. Finally, we discuss the challenges that need to be dealt with to implement the VR interventions through resilience centers in Israel.},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Han, Bin; Yau, Cleo; Lei, Su; Gratch, Jonathan
Knowledge-based Emotion Recognition using Large Language Models Miscellaneous
2024, (arXiv:2408.04123 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Emotions
@misc{han_knowledge-based_2024,
title = {Knowledge-based Emotion Recognition using Large Language Models},
author = {Bin Han and Cleo Yau and Su Lei and Jonathan Gratch},
url = {http://arxiv.org/abs/2408.04123},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Emotion recognition in social situations is a complex task that requires integrating information from both facial expressions and the situational context. While traditional approaches to automatic emotion recognition have focused on decontextualized signals, recent research emphasizes the importance of context in shaping emotion perceptions. This paper contributes to the emerging field of context-based emotion recognition by leveraging psychological theories of human emotion perception to inform the design of automated methods. We propose an approach that combines emotion recognition methods with Bayesian Cue Integration (BCI) to integrate emotion inferences from decontextualized facial expressions and contextual knowledge inferred via Large-language Models. We test this approach in the context of interpreting facial expressions during a social task, the prisoner's dilemma. Our results provide clear support for BCI across a range of automatic emotion recognition methods. The best automated method achieved results comparable to human observers, suggesting the potential for this approach to advance the field of affective computing.},
note = {arXiv:2408.04123 [cs]},
keywords = {DTIC, Emotions},
pubstate = {published},
tppubtype = {misc}
}
Parga, Madeline R.; Roll, Shawn C.; Lucas, Gale M.; Becerik-Gerber, Burcin; Naranayan, Shrikanth
Differences in Self-Rated Worker Outcomes Across Stress States: An Interim Analysis of Hybrid Worker Data Journal Article
In: Proceedings of the Human Factors and Ergonomics Society Annual Meeting, 2024, ISSN: 1071-1813, 2169-5067, (Publisher: SAGE Publications).
Abstract | Links | BibTeX | Tags:
@article{parga_differences_2024,
title = {Differences in Self-Rated Worker Outcomes Across Stress States: An Interim Analysis of Hybrid Worker Data},
author = {Madeline R. Parga and Shawn C. Roll and Gale M. Lucas and Burcin Becerik-Gerber and Shrikanth Naranayan},
url = {https://journals.sagepub.com/doi/10.1177/10711813241275500},
doi = {10.1177/10711813241275500},
issn = {1071-1813, 2169-5067},
year = {2024},
date = {2024-08-01},
urldate = {2024-09-17},
journal = {Proceedings of the Human Factors and Ergonomics Society Annual Meeting},
abstract = {Stress experiences can have dire consequences for worker performance and well-being, and the social environment of the workplace is a key contributor to worker experience. This study investigated the relationship between hybrid workers’ self-ratings of productivity, mood, and stress with perceptions of positive (eustress) and negative (distress) stress states. We hypothesized that self-ratings would vary across combinations of eustress and distress experiences and that these differences would differ based on the social context. Ecological momentary assessments (EMA) were used to obtain ecologically valid data at four data points each workday across a 4-month study period in a cohort of seven office workers. Findings aligned with the Yerkes–Dodson law, such that higher states of arousal were associated with greater self-perceived productivity, and higher stress magnitudes were found when distress existed. Compared to other states, eustress was associated with higher productivity in work-related activities and better mood across all activity types.},
note = {Publisher: SAGE Publications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tak, Ala N.; Gratch, Jonathan
GPT-4 Emulates Average-Human Emotional Cognition from a Third-Person Perspective Miscellaneous
2024, (arXiv:2408.13718 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Emotions
@misc{tak_gpt-4_2024,
title = {GPT-4 Emulates Average-Human Emotional Cognition from a Third-Person Perspective},
author = {Ala N. Tak and Jonathan Gratch},
url = {http://arxiv.org/abs/2408.13718},
year = {2024},
date = {2024-08-01},
urldate = {2024-09-17},
publisher = {arXiv},
abstract = {This paper extends recent investigations on the emotional reasoning abilities of Large Language Models (LLMs). Current research on LLMs has not directly evaluated the distinction between how LLMs predict the self-attribution of emotions and the perception of others' emotions. We first look at carefully crafted emotion-evoking stimuli, originally designed to find patterns of brain neural activity representing fine-grained inferred emotional attributions of others. We show that GPT-4 is especially accurate in reasoning about such stimuli. This suggests LLMs agree with humans' attributions of others' emotions in stereotypical scenarios remarkably more than self-attributions of emotions in idiosyncratic situations. To further explore this, our second study utilizes a dataset containing annotations from both the author and a third-person perspective. We find that GPT-4's interpretations align more closely with human judgments about the emotions of others than with self-assessments. Notably, conventional computational models of emotion primarily rely on self-reported ground truth as the gold standard. However, an average observer's standpoint, which LLMs appear to have adopted, might be more relevant for many downstream applications, at least in the absence of individual information and adequate safety considerations.},
note = {arXiv:2408.13718 [cs]},
keywords = {DTIC, Emotions},
pubstate = {published},
tppubtype = {misc}
}
Roth, Holger R.; Beutel, Daniel J.; Cheng, Yan; Marques, Javier Fernandez; Pan, Heng; Chen, Chester; Zhang, Zhihong; Wen, Yuhong; Yang, Sean; Isaac,; Yang,; Hsieh, Yuan-Ting; Xu, Ziyue; Xu, Daguang; Lane, Nicholas D.; Feng, Andrew
Supercharging Federated Learning with Flower and NVIDIA FLARE Miscellaneous
2024, (arXiv:2407.00031 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{roth_supercharging_2024,
title = {Supercharging Federated Learning with Flower and NVIDIA FLARE},
author = {Holger R. Roth and Daniel J. Beutel and Yan Cheng and Javier Fernandez Marques and Heng Pan and Chester Chen and Zhihong Zhang and Yuhong Wen and Sean Yang and Isaac and Yang and Yuan-Ting Hsieh and Ziyue Xu and Daguang Xu and Nicholas D. Lane and Andrew Feng},
url = {http://arxiv.org/abs/2407.00031},
doi = {10.48550/arXiv.2407.00031},
year = {2024},
date = {2024-07-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Several open-source systems, such as Flower and NVIDIA FLARE, have been developed in recent years while focusing on different aspects of federated learning (FL). Flower is dedicated to implementing a cohesive approach to FL, analytics, and evaluation. Over time, Flower has cultivated extensive strategies and algorithms tailored for FL application development, fostering a vibrant FL community in research and industry. Conversely, FLARE has prioritized the creation of an enterprise-ready, resilient runtime environment explicitly designed for FL applications in production environments. In this paper, we describe our initial integration of both frameworks and show how they can work together to supercharge the FL ecosystem as a whole. Through the seamless integration of Flower and FLARE, applications crafted within the Flower framework can effortlessly operate within the FLARE runtime environment without necessitating any modifications. This initial integration streamlines the process, eliminating complexities and ensuring smooth interoperability between the two platforms, thus enhancing the overall efficiency and accessibility of FL applications.},
note = {arXiv:2407.00031 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Diaz-Pinto, Andres; Alle, Sachidanand; Nath, Vishwesh; Tang, Yucheng; Ihsani, Alvin; Asad, Muhammad; Pérez-García, Fernando; Mehta, Pritesh; Li, Wenqi; Flores, Mona; Roth, Holger R.; Vercauteren, Tom; Xu, Daguang; Dogra, Prerna; Ourselin, Sebastien; Feng, Andrew; Cardoso, M. Jorge
MONAI Label: A framework for AI-assisted interactive labeling of 3D medical images Journal Article
In: Medical Image Analysis, vol. 95, pp. 103207, 2024, ISSN: 13618415.
@article{diaz-pinto_monai_2024,
title = {MONAI Label: A framework for AI-assisted interactive labeling of 3D medical images},
author = {Andres Diaz-Pinto and Sachidanand Alle and Vishwesh Nath and Yucheng Tang and Alvin Ihsani and Muhammad Asad and Fernando Pérez-García and Pritesh Mehta and Wenqi Li and Mona Flores and Holger R. Roth and Tom Vercauteren and Daguang Xu and Prerna Dogra and Sebastien Ourselin and Andrew Feng and M. Jorge Cardoso},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1361841524001324},
doi = {10.1016/j.media.2024.103207},
issn = {13618415},
year = {2024},
date = {2024-07-01},
urldate = {2025-01-16},
journal = {Medical Image Analysis},
volume = {95},
pages = {103207},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Huang, Shuo; Jones, Fred; Gurney, Nikolos; Pynadath, David; Srivastava, Kunal; Trent, Stoney; Wu, Peggy; Zhu, Quanyan
PsybORG+: Modeling and Simulation for Detecting Cognitive Biases in Advanced Persistent Threats Miscellaneous
2024, (Version Number: 3).
Abstract | Links | BibTeX | Tags: DTIC
@misc{huang_psyborg_2024,
title = {PsybORG+: Modeling and Simulation for Detecting Cognitive Biases in Advanced Persistent Threats},
author = {Shuo Huang and Fred Jones and Nikolos Gurney and David Pynadath and Kunal Srivastava and Stoney Trent and Peggy Wu and Quanyan Zhu},
url = {https://arxiv.org/abs/2408.01310},
doi = {10.48550/ARXIV.2408.01310},
year = {2024},
date = {2024-07-01},
urldate = {2024-12-05},
publisher = {arXiv},
abstract = {Advanced Persistent Threats (APTs) bring significant challenges to cybersecurity due to their sophisticated and stealthy nature. Traditional cybersecurity measures fail to defend against APTs. Cognitive vulnerabilities can significantly influence attackers' decision-making processes, which presents an opportunity for defenders to exploit. This work introduces PsybORG$ˆ+$, a multi-agent cybersecurity simulation environment designed to model APT behaviors influenced by cognitive vulnerabilities. A classification model is built for cognitive vulnerability inference and a simulator is designed for synthetic data generation. Results show that PsybORG$ˆ+$ can effectively model APT attackers with different loss aversion and confirmation bias levels. The classification model has at least a 0.83 accuracy rate in predicting cognitive vulnerabilities.},
note = {Version Number: 3},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Owayyed, Mohammed Al; Tielman, Myrthe; Hartholt, Arno; Specht, Marcus; Brinkman, Willem-Paul
Agent-based social skills training systems: the ARTES architecture, interaction characteristics, learning theories and future outlooks Journal Article
In: Behaviour & Information Technology, pp. 1–28, 2024, ISSN: 0144-929X, 1362-3001.
Links | BibTeX | Tags: Virtual Agents, Virtual Humans
@article{al_owayyed_agent-based_2024,
title = {Agent-based social skills training systems: the ARTES architecture, interaction characteristics, learning theories and future outlooks},
author = {Mohammed Al Owayyed and Myrthe Tielman and Arno Hartholt and Marcus Specht and Willem-Paul Brinkman},
url = {https://www.tandfonline.com/doi/full/10.1080/0144929X.2024.2374891},
doi = {10.1080/0144929X.2024.2374891},
issn = {0144-929X, 1362-3001},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {Behaviour & Information Technology},
pages = {1–28},
keywords = {Virtual Agents, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Bell, Imogen H.; Pot-Kolder, Roos; Rizzo, Albert; Rus-Calafell, Mar; Cardi, Valentina; Cella, Matteo; Ward, Thomas; Riches, Simon; Reinoso, Martin; Thompson, Andrew; Alvarez-Jimenez, Mario; Valmaggia, Lucia
Advances in the use of virtual reality to treat mental health conditions Journal Article
In: Nat Rev Psychol, 2024, ISSN: 2731-0574.
@article{bell_advances_2024,
title = {Advances in the use of virtual reality to treat mental health conditions},
author = {Imogen H. Bell and Roos Pot-Kolder and Albert Rizzo and Mar Rus-Calafell and Valentina Cardi and Matteo Cella and Thomas Ward and Simon Riches and Martin Reinoso and Andrew Thompson and Mario Alvarez-Jimenez and Lucia Valmaggia},
url = {https://www.nature.com/articles/s44159-024-00334-9},
doi = {10.1038/s44159-024-00334-9},
issn = {2731-0574},
year = {2024},
date = {2024-07-01},
urldate = {2024-07-11},
journal = {Nat Rev Psychol},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Gunasekara, Chulaka; Kim, Seokhwan; D'Haro, Luis Fernando; Rastogi, Abhinav; Chen, Yun-Nung; Eric, Mihail; Hedayatnia, Behnam; Gopalakrishnan, Karthik; Liu, Yang; Huang, Chao-Wei; Hakkani-Tür, Dilek; Li, Jinchao; Zhu, Qi; Luo, Lingxiao; Liden, Lars; Huang, Kaili; Shayandeh, Shahin; Liang, Runze; Peng, Baolin; Zhang, Zheng; Shukla, Swadheen; Huang, Minlie; Gao, Jianfeng; Mehri, Shikib; Feng, Yulan; Gordon, Carla; Alavi, Seyed Hossein; Traum, David; Eskenazi, Maxine; Beirami, Ahmad; Cho, Eunjoon; Crook, Paul A.; De, Ankita; Geramifard, Alborz; Kottur, Satwik; Moon, Seungwhan; Poddar, Shivani; Subba, Rajen
Overview of the Ninth Dialog System Technology Challenge: DSTC9 Journal Article
In: IEEE/ACM Trans. Audio Speech Lang. Process., pp. 1–10, 2024, ISSN: 2329-9290, 2329-9304.
Links | BibTeX | Tags: Natural Language
@article{gunasekara_overview_2024,
title = {Overview of the Ninth Dialog System Technology Challenge: DSTC9},
author = {Chulaka Gunasekara and Seokhwan Kim and Luis Fernando D'Haro and Abhinav Rastogi and Yun-Nung Chen and Mihail Eric and Behnam Hedayatnia and Karthik Gopalakrishnan and Yang Liu and Chao-Wei Huang and Dilek Hakkani-Tür and Jinchao Li and Qi Zhu and Lingxiao Luo and Lars Liden and Kaili Huang and Shahin Shayandeh and Runze Liang and Baolin Peng and Zheng Zhang and Swadheen Shukla and Minlie Huang and Jianfeng Gao and Shikib Mehri and Yulan Feng and Carla Gordon and Seyed Hossein Alavi and David Traum and Maxine Eskenazi and Ahmad Beirami and Eunjoon Cho and Paul A. Crook and Ankita De and Alborz Geramifard and Satwik Kottur and Seungwhan Moon and Shivani Poddar and Rajen Subba},
url = {https://ieeexplore.ieee.org/document/10595468/},
doi = {10.1109/TASLP.2024.3426331},
issn = {2329-9290, 2329-9304},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {IEEE/ACM Trans. Audio Speech Lang. Process.},
pages = {1–10},
keywords = {Natural Language},
pubstate = {published},
tppubtype = {article}
}
Han, Bin; Yau, Cleo; Lei, Su; Gratch, Jonathan
In-Depth Analysis of Emotion Recognition through Knowledge-Based Large Language Models Miscellaneous
2024, (arXiv:2408.00780 [cs]).
Abstract | Links | BibTeX | Tags: Virtual Humans
@misc{han_-depth_2024,
title = {In-Depth Analysis of Emotion Recognition through Knowledge-Based Large Language Models},
author = {Bin Han and Cleo Yau and Su Lei and Jonathan Gratch},
url = {http://arxiv.org/abs/2408.00780},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Emotion recognition in social situations is a complex task that requires integrating information from both facial expressions and the situational context. While traditional approaches to automatic emotion recognition have focused on decontextualized signals, recent research emphasizes the importance of context in shaping emotion perceptions. This paper contributes to the emerging field of context-based emotion recognition by leveraging psychological theories of human emotion perception to inform the design of automated methods. We propose an approach that combines emotion recognition methods with Bayesian Cue Integration (BCI) to integrate emotion inferences from decontextualized facial expressions and contextual knowledge inferred via Large-language Models. We test this approach in the context of interpreting facial expressions during a social task, the prisoner's dilemma. Our results provide clear support for BCI across a range of automatic emotion recognition methods. The best automated method achieved results comparable to human observers, suggesting the potential for this approach to advance the field of affective computing.},
note = {arXiv:2408.00780 [cs]},
keywords = {Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Xiao, Hanyuan; Chen, Yingshu; Huang, Huajian; Xiong, Haolin; Yang, Jing; Prasad, Pratusha; Zhao, Yajie
Localized Gaussian Splatting Editing with Contextual Awareness Miscellaneous
2024, (arXiv:2408.00083 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, VGL
@misc{xiao_localized_2024,
title = {Localized Gaussian Splatting Editing with Contextual Awareness},
author = {Hanyuan Xiao and Yingshu Chen and Huajian Huang and Haolin Xiong and Jing Yang and Pratusha Prasad and Yajie Zhao},
url = {http://arxiv.org/abs/2408.00083},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-16},
publisher = {arXiv},
abstract = {Recent text-guided generation of individual 3D object has achieved great success using diffusion priors. However, these methods are not suitable for object insertion and replacement tasks as they do not consider the background, leading to illumination mismatches within the environment. To bridge the gap, we introduce an illumination-aware 3D scene editing pipeline for 3D Gaussian Splatting (3DGS) representation. Our key observation is that inpainting by the state-of-the-art conditional 2D diffusion model is consistent with background in lighting. To leverage the prior knowledge from the well-trained diffusion models for 3D object generation, our approach employs a coarse-to-fine objection optimization pipeline with inpainted views. In the first coarse step, we achieve image-to-3D lifting given an ideal inpainted view. The process employs 3D-aware diffusion prior from a view-conditioned diffusion model, which preserves illumination present in the conditioning image. To acquire an ideal inpainted image, we introduce an Anchor View Proposal (AVP) algorithm to find a single view that best represents the scene illumination in target region. In the second Texture Enhancement step, we introduce a novel Depth-guided Inpainting Score Distillation Sampling (DI-SDS), which enhances geometry and texture details with the inpainting diffusion prior, beyond the scope of the 3D-aware diffusion prior knowledge in the first coarse step. DI-SDS not only provides fine-grained texture enhancement, but also urges optimization to respect scene lighting. Our approach efficiently achieves local editing with global illumination consistency without explicitly modeling light transport. We demonstrate robustness of our method by evaluating editing in real scenes containing explicit highlight and shadows, and compare against the state-of-the-art text-to-3D editing methods.},
note = {arXiv:2408.00083 [cs]},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {misc}
}
Liu, Ruying; Wu, Wanjing; Becerik-Gerber, Burcin; Lucas, Gale M.
2024, (arXiv:2407.10441 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Virtual Worlds
@misc{liu_enhancing_2024,
title = {Enhancing Building Safety Design for Active Shooter Incidents: Exploration of Building Exit Parameters using Reinforcement Learning-Based Simulations},
author = {Ruying Liu and Wanjing Wu and Burcin Becerik-Gerber and Gale M. Lucas},
url = {http://arxiv.org/abs/2407.10441},
year = {2024},
date = {2024-07-01},
urldate = {2024-09-17},
publisher = {arXiv},
abstract = {With the alarming rise in active shooter incidents (ASIs) in the United States, enhancing public safety through building design has become a pressing need. This study proposes a reinforcement learning-based simulation approach addressing gaps in existing research that has neglected the dynamic behaviours of shooters. We developed an autonomous agent to simulate an active shooter within a realistic office environment, aiming to offer insights into the interactions between building design parameters and ASI outcomes. A case study is conducted to quantitatively investigate the impact of building exit numbers (total count of accessible exits) and configuration (arrangement of which exits are available or not) on evacuation and harm rates. Findings demonstrate that greater exit availability significantly improves evacuation outcomes and reduces harm. Exits nearer to the shooter's initial position hold greater importance for accessibility than those farther away. By encompassing dynamic shooter behaviours, this study offers preliminary insights into effective building safety design against evolving threats.},
note = {arXiv:2407.10441 [cs]},
keywords = {DTIC, Virtual Worlds},
pubstate = {published},
tppubtype = {misc}
}
Ke, Pei; Wen, Bosi; Feng, Andrew; Liu, Xiao; Lei, Xuanyu; Cheng, Jiale; Wang, Shengyuan; Zeng, Aohan; Dong, Yuxiao; Wang, Hongning; Tang, Jie; Huang, Minlie
CritiqueLLM: Towards an Informative Critique Generation Model for Evaluation of Large Language Model Generation Proceedings Article
In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 13034–13054, Association for Computational Linguistics, Bangkok, Thailand, 2024.
Links | BibTeX | Tags: Natural Language
@inproceedings{ke_critiquellm_2024,
title = {CritiqueLLM: Towards an Informative Critique Generation Model for Evaluation of Large Language Model Generation},
author = {Pei Ke and Bosi Wen and Andrew Feng and Xiao Liu and Xuanyu Lei and Jiale Cheng and Shengyuan Wang and Aohan Zeng and Yuxiao Dong and Hongning Wang and Jie Tang and Minlie Huang},
url = {https://aclanthology.org/2024.acl-long.704},
doi = {10.18653/v1/2024.acl-long.704},
year = {2024},
date = {2024-06-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {13034–13054},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
keywords = {Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Lu, Shuhong; Jin, Zhangyu; Rajendran, Vickram; Harari, Michal; Feng, Andrew; Melo, Celso M. De
Synthetic-to-real adaptation for complex action recognition in surveillance applications Proceedings Article
In: Manser, Kimberly E.; Melo, Celso De; Rao, Raghuveer M.; Howell, Christopher L. (Ed.): Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II, pp. 14, SPIE, National Harbor, United States, 2024, ISBN: 978-1-5106-7388-5 978-1-5106-7389-2.
@inproceedings{lu_synthetic–real_2024,
title = {Synthetic-to-real adaptation for complex action recognition in surveillance applications},
author = {Shuhong Lu and Zhangyu Jin and Vickram Rajendran and Michal Harari and Andrew Feng and Celso M. De Melo},
editor = {Kimberly E. Manser and Celso De Melo and Raghuveer M. Rao and Christopher L. Howell},
url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/13035/3012393/Synthetic-to-real-adaptation-for-complex-action-recognition-in-surveillance/10.1117/12.3012393.full},
doi = {10.1117/12.3012393},
isbn = {978-1-5106-7388-5 978-1-5106-7389-2},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
booktitle = {Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II},
pages = {14},
publisher = {SPIE},
address = {National Harbor, United States},
keywords = {DTIC},
pubstate = {published},
tppubtype = {inproceedings}
}
Nurunnabi, Abdul; Teferle, Felicia; Laefer, Debra F.; Chen, Meida; Ali, Mir Masoom
Development of a Precise Tree Structure from LiDAR Point Clouds Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 301–308, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: Narrative, VGL
@article{nurunnabi_development_2024,
title = {Development of a Precise Tree Structure from LiDAR Point Clouds},
author = {Abdul Nurunnabi and Felicia Teferle and Debra F. Laefer and Meida Chen and Mir Masoom Ali},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/301/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-301-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {301–308},
abstract = {Abstract. A precise tree structure that represents the distribution of tree stem, branches, and leaves is crucial for accurately capturing the full representation of a tree. Light Detection and Ranging (LiDAR)-based three-dimensional (3D) point clouds (PCs) capture the geometry of scanned objects including forests stands and individual trees. PCs are irregular, unstructured, often noisy, and contaminated by outliers. Researchers have struggled to develop methods to separate leaves and wood without losing the tree geometry. This paper proposes a solution that employs only the spatial coordinates (x, y, z) of the PC. The new algorithm works as a filtering approach, utilizing multi-scale neighborhood-based geometric features (GFs) e.g., linearity, planarity, and verticality to classify linear (wood) and non-linear (leaf) points. This involves finding potential wood points and coupling them with an octree-based segmentation to develop a tree architecture. The main contributions of this paper are (i) investigating the potential of different GFs to split linear and non-linear points, (ii) introducing a novel method that pointwise classifies leaf and wood points, and (iii) developing a precise 3D tree structure. The performance of the new algorithm has been demonstrated through terrestrial laser scanning PCs. For a Scots pine tree, the new method classifies leaf and wood points with an overall accuracy of 97.9%.},
keywords = {Narrative, VGL},
pubstate = {published},
tppubtype = {article}
}
Zhang, Mingyuan; Cai, Zhongang; Pan, Liang; Hong, Fangzhou; Guo, Xinying; Yang, Lei; Liu, Ziwei
MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model Journal Article
In: IEEE Trans. Pattern Anal. Mach. Intell., vol. 46, no. 6, pp. 4115–4128, 2024, ISSN: 0162-8828, 2160-9292, 1939-3539.
@article{zhang_motiondiffuse_2024,
title = {MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model},
author = {Mingyuan Zhang and Zhongang Cai and Liang Pan and Fangzhou Hong and Xinying Guo and Lei Yang and Ziwei Liu},
url = {https://ieeexplore.ieee.org/document/10416192/},
doi = {10.1109/TPAMI.2024.3355414},
issn = {0162-8828, 2160-9292, 1939-3539},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-18},
journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
volume = {46},
number = {6},
pages = {4115–4128},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
Yin, Yinxuan; Nayyar, Mollik; Holman, Daniel; Lucas, Gale; Holbrook, Colin; Wagner, Alan
Validation and Evacuee Modeling of Virtual Robot-guided Emergency Evacuation Experiments Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: DTIC, Virtual Humans
@misc{yin_validation_2024,
title = {Validation and Evacuee Modeling of Virtual Robot-guided Emergency Evacuation Experiments},
author = {Yinxuan Yin and Mollik Nayyar and Daniel Holman and Gale Lucas and Colin Holbrook and Alan Wagner},
url = {https://osf.io/mr78s},
doi = {10.31234/osf.io/mr78s},
year = {2024},
date = {2024-06-01},
urldate = {2024-09-17},
publisher = {Center for Open Science},
abstract = {Virtual Reality (VR) is an increasingly common tool for investigating human responses to emergency situations. Nonetheless, studies validating and comparing human subject behavior during real world emergencies to their responses in VR are notably rare, and no prior studies have validated whether human emergency responses to guidance from a robot are comparable in VR versus the real world. In the present pre-registered study, we used VR to replicate a previous robot- guided emergency evacuation study conducted in the real world and compared human subject behavior in matched physical and virtual environments. In both environments, human subjects were asked to follow a robot to a location and to then read an article. While reading, a fire alarm sounds. The robot then attempted to guide them to a distant, unfamiliar exit rather than nearby and familiar exits. We observed close correspondences between evacuee exit choice (the robot’s distant exit versus closer exits), evacuation time, and trust in the robot between the VR and physical environments. We further demonstrate that data collected in virtual reality can be used to create accurate motion models (mean error of 0.42 centimeters) predicting evacuee trajectories and locations in real life. Taken together, the results provide evidence for the ecological validity of VR approaches to studying human-robot interaction, particularly robot- guided emergency evacuation.},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Saxon, Leslie; Faulk, Robert T; Boberg, Jill; Barrett, Trevor; McLelland, Steve
In: J. Spec. Oper. Med., 2024, ISSN: 1553-9768.
Links | BibTeX | Tags: CBC, DTIC
@article{saxon_continuous_2024,
title = {Continuous Assessment of Active-Duty Army Special Operations and Reconnaissance Marines Using Digital Devices and Custom Software: The Digital Comprehensive Operator Readiness Assessment (DcORA) Study},
author = {Leslie Saxon and Robert T Faulk and Jill Boberg and Trevor Barrett and Steve McLelland},
url = {https://www.jsomonline.org/Citations/PXKK-I23D.php},
doi = {10.55460/PXKK-I23D},
issn = {1553-9768},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-25},
journal = {J. Spec. Oper. Med.},
keywords = {CBC, DTIC},
pubstate = {published},
tppubtype = {article}
}
Greenwald, Eric; Krakowski, Ari; Hurt, Timothy; Grindstaff, Kelly; Wang, Ning
It's like I'm the AI: Youth Sensemaking About AI through Metacognitive Embodiment Proceedings Article
In: Proceedings of the 23rd Annual ACM Interaction Design and Children Conference, pp. 789–793, ACM, Delft Netherlands, 2024, ISBN: 979-8-4007-0442-0.
Links | BibTeX | Tags: AI, Machine Learning
@inproceedings{greenwald_its_2024,
title = {It's like I'm the AI: Youth Sensemaking About AI through Metacognitive Embodiment},
author = {Eric Greenwald and Ari Krakowski and Timothy Hurt and Kelly Grindstaff and Ning Wang},
url = {https://dl.acm.org/doi/10.1145/3628516.3659395},
doi = {10.1145/3628516.3659395},
isbn = {979-8-4007-0442-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-25},
booktitle = {Proceedings of the 23rd Annual ACM Interaction Design and Children Conference},
pages = {789–793},
publisher = {ACM},
address = {Delft Netherlands},
keywords = {AI, Machine Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: DTIC, Graphics, VGL
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {DTIC, Graphics, VGL},
pubstate = {published},
tppubtype = {article}
}
Nye, Benjamin D.; Core, Mark G.; Chereddy, Sai V. R.; Young, Vivian; Auerbach, Daniel
Bootstrapping Assessments for Team Simulations: Transfer Learning Between First-Person-Shooter Game Maps Book Section
In: Sottilare, Robert A.; Schwarz, Jessica (Ed.): Adaptive Instructional Systems, vol. 14727, pp. 261–271, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-60608-3 978-3-031-60609-0, (Series Title: Lecture Notes in Computer Science).
Links | BibTeX | Tags: DTIC, Learning Sciences, Machine Learning, UARC
@incollection{sottilare_bootstrapping_2024,
title = {Bootstrapping Assessments for Team Simulations: Transfer Learning Between First-Person-Shooter Game Maps},
author = {Benjamin D. Nye and Mark G. Core and Sai V. R. Chereddy and Vivian Young and Daniel Auerbach},
editor = {Robert A. Sottilare and Jessica Schwarz},
url = {https://link.springer.com/10.1007/978-3-031-60609-0_19},
doi = {10.1007/978-3-031-60609-0_19},
isbn = {978-3-031-60608-3 978-3-031-60609-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-18},
booktitle = {Adaptive Instructional Systems},
volume = {14727},
pages = {261–271},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC, Learning Sciences, Machine Learning, UARC},
pubstate = {published},
tppubtype = {incollection}
}
Core, Mark G.; Nye, Benjamin D.; Fegley, Brent D.
Trend-Aware Scenario Authoring: Adapting Training Toward Patterns from Real Operations Book Section
In: Sottilare, Robert A.; Schwarz, Jessica (Ed.): Adaptive Instructional Systems, vol. 14727, pp. 15–24, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-60608-3 978-3-031-60609-0, (Series Title: Lecture Notes in Computer Science).
Links | BibTeX | Tags: DTIC, Learning Sciences, UARC
@incollection{sottilare_trend-aware_2024,
title = {Trend-Aware Scenario Authoring: Adapting Training Toward Patterns from Real Operations},
author = {Mark G. Core and Benjamin D. Nye and Brent D. Fegley},
editor = {Robert A. Sottilare and Jessica Schwarz},
url = {https://link.springer.com/10.1007/978-3-031-60609-0_2},
doi = {10.1007/978-3-031-60609-0_2},
isbn = {978-3-031-60608-3 978-3-031-60609-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-18},
booktitle = {Adaptive Instructional Systems},
volume = {14727},
pages = {15–24},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC, Learning Sciences, UARC},
pubstate = {published},
tppubtype = {incollection}
}
Artstein, Ron; Chen, Elizabeth
Augmenting Training Data for a Virtual Character Using GPT-3.5 Proceedings Article
In: Tyhe Florida Artificial Intelligence Research Society, 2024.
Abstract | Links | BibTeX | Tags: Dialogue, DTIC, Natural Language
@inproceedings{artstein_augmenting_2024,
title = {Augmenting Training Data for a Virtual Character Using GPT-3.5},
author = {Ron Artstein and Elizabeth Chen},
url = {https://journals.flvc.org/FLAIRS/article/view/135552},
year = {2024},
date = {2024-05-01},
volume = {37},
publisher = {Tyhe Florida Artificial Intelligence Research Society},
abstract = {This paper compares different methods of using a large lan-guage model (GPT-3.5) for creating synthetic training datafor a retrieval-based conversational character. The trainingdata are in the form of linked questions and answers, whichallow a classifier to retrieve a pre-recorded answer to an un-seen question; the intuition is that a large language modelcould predict what human users might ask, thus saving theeffort of collecting real user questions as training data. Re-sults show small improvements in test performance for allsynthetic datasets. However, a classifier trained on only smallamounts of collected user data resulted in a higher F-scorethan the classifiers trained on much larger amounts of syn-thetic data generated using GPT-3.5. Based on these results,we see a potential in using large language models for gener-ating training data, but at this point it is not as valuable ascollecting actual user data for training.},
keywords = {Dialogue, DTIC, Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Bohy, Hugo; Tran, Minh; Haddad, Kevin El; Dutoit, Thierry; Soleymani, Mohammad
Social-MAE: A Transformer-Based Multimodal Autoencoder for Face and Voice Proceedings Article
In: 2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG), pp. 1–5, IEEE, Istanbul, Turkiye, 2024, ISBN: 979-8-3503-9494-8.
@inproceedings{bohy_social-mae_2024,
title = {Social-MAE: A Transformer-Based Multimodal Autoencoder for Face and Voice},
author = {Hugo Bohy and Minh Tran and Kevin El Haddad and Thierry Dutoit and Mohammad Soleymani},
url = {https://ieeexplore.ieee.org/document/10581940/},
doi = {10.1109/FG59268.2024.10581940},
isbn = {979-8-3503-9494-8},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-18},
booktitle = {2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG)},
pages = {1–5},
publisher = {IEEE},
address = {Istanbul, Turkiye},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
Abstract | Links | BibTeX | Tags: Graphics, VGL
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {misc}
}
Chang, Di; Shi, Yichun; Gao, Quankai; Fu, Jessica; Xu, Hongyi; Song, Guoxian; Yan, Qing; Zhu, Yizhe; Yang, Xiao; Soleymani, Mohammad
MagicPose: Realistic Human Poses and Facial Expressions Retargeting with Identity-aware Diffusion Miscellaneous
2024, (arXiv:2311.12052 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{chang_magicpose_2024,
title = {MagicPose: Realistic Human Poses and Facial Expressions Retargeting with Identity-aware Diffusion},
author = {Di Chang and Yichun Shi and Quankai Gao and Jessica Fu and Hongyi Xu and Guoxian Song and Qing Yan and Yizhe Zhu and Xiao Yang and Mohammad Soleymani},
url = {http://arxiv.org/abs/2311.12052},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-18},
publisher = {arXiv},
abstract = {In this work, we propose MagicPose, a diffusion-based model for 2D human pose and facial expression retargeting. Specifically, given a reference image, we aim to generate a person's new images by controlling the poses and facial expressions while keeping the identity unchanged. To this end, we propose a two-stage training strategy to disentangle human motions and appearance (e.g., facial expressions, skin tone and dressing), consisting of (1) the pre-training of an appearance-control block and (2) learning appearance-disentangled pose control. Our novel design enables robust appearance control over generated human images, including body, facial attributes, and even background. By leveraging the prior knowledge of image diffusion models, MagicPose generalizes well to unseen human identities and complex poses without the need for additional fine-tuning. Moreover, the proposed model is easy to use and can be considered as a plug-in module/extension to Stable Diffusion. The code is available at: https://github.com/Boese0601/MagicDance},
note = {arXiv:2311.12052 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Koresh, Caleb; Ustun, Volkan; Kumar, Rajay; Aris, Tim
Improving Reinforcement Learning Experiments in Unity through Waypoint Utilization Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Machine Learning
@article{koresh_improving_2024,
title = {Improving Reinforcement Learning Experiments in Unity through Waypoint Utilization},
author = {Caleb Koresh and Volkan Ustun and Rajay Kumar and Tim Aris},
url = {https://journals.flvc.org/FLAIRS/article/view/135571},
doi = {10.32473/flairs.37.1.135571},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {Multi-agent Reinforcement Learning (MARL) models teams of agents that learn by dynamically interacting with an environment and each other, presenting opportunities to train adaptive models for team-based scenarios. However, MARL algorithms pose substantial challenges due to their immense computational requirements. This paper introduces an automatically generated waypoint-based movement system to abstract and simplify complex environments in Unity while allowing agents to learn strategic cooperation. To demonstrate the effectiveness of our approach, we utilized a simple scenario with heterogeneous roles in each team. We trained this scenario on variations of realistic terrains and compared learning between fine-grained (almost) continuous and waypoint-based movement systems. Our results indicate efficiency in learning and improved performance with waypoint-based navigation. Furthermore, our results show that waypoint-based movement systems can effectively learn differentiated behavior policies for heterogeneous roles in these experiments. These early exploratory results point out the potential of waypoint-based navigation for reducing the computational costs of developing and training MARL models in complex environments. The complete project with all scenarios and results is available on GitHub: https://github.com/HATS-ICT/ml-agents-dodgeball-env-ICT.},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {article}
}
Aris, Timothy; Ustun, Volkan; Kumar, Rajay
Training Reinforcement Learning Agents to React to an Ambush for Military Simulations Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Simulation, VR
@article{aris_training_2024,
title = {Training Reinforcement Learning Agents to React to an Ambush for Military Simulations},
author = {Timothy Aris and Volkan Ustun and Rajay Kumar},
url = {https://journals.flvc.org/FLAIRS/article/view/135578},
doi = {10.32473/flairs.37.1.135578},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {There is a need for realistic Opposing Forces (OPFOR)behavior in military training simulations. Current trainingsimulations generally only have simple, non-adaptivebehaviors, requiring human instructors to play the role ofOPFOR in any complicated scenario. This poster addressesthis need by focusing on a specific scenario: trainingreinforcement learning agents to react to an ambush. Itproposes a novel way to check for occlusion algorithmically.It shows vector fields showing the agent’s actions throughthe course of a training run. It shows that a single agentswitching between multiple goals is possible, at least in asimplified environment. Such an approach could reduce theneed to develop different agents for different scenarios.Finally, it shows a competent agent trained on a simplifiedReact to Ambush scenario, demonstrating the plausibility ofa scaled-up version.},
keywords = {Simulation, VR},
pubstate = {published},
tppubtype = {article}
}
Liu, Lixing; Ustun, Volkan; Kumar, Rajay
Leveraging Organizational Hierarchy to Simplify Reward Design in Cooperative Multi-agent Reinforcement Learning Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Machine Learning
@article{liu_leveraging_2024,
title = {Leveraging Organizational Hierarchy to Simplify Reward Design in Cooperative Multi-agent Reinforcement Learning},
author = {Lixing Liu and Volkan Ustun and Rajay Kumar},
url = {https://journals.flvc.org/FLAIRS/article/view/135588},
doi = {10.32473/flairs.37.1.135588},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {The effectiveness of multi-agent reinforcement learning (MARL) hinges largely on the meticulous arrangement of objectives. Yet, conventional MARL methods might not completely harness the inherent structures present in environmental states and agent relationships for goal organization. This study is conducted within the domain of military training simulations, which are typically characterized by complexity, heterogeneity, non-stationary and doctrine-driven environments with a clear organizational hierarchy and a top-down chain of command. This research investigates the approximation and integration of the organizational hierarchy into MARL for cooperative training scenarios, with the goal of streamlining the processes of reward engineering and enhancing team coordination. In the preliminary experiments, we employed two-tiered commander-subordinate feudal hierarchical (CSFH) networks to separate the prioritized team goal and individual goals. The empirical results demonstrate that the proposed framework enhances learning efficiency. It guarantees the learning of a prioritized policy for the commander agent and encourages subordinate agents to explore areas of interest more frequently, guided by appropriate soft constraints imposed by the commander.},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {article}
}
Lukin, Stephanie M; Bonial, Claire; Marge, Matthew; Hudson, Taylor; Hayes, Cory J.; Pollard, Kimberly; Baker, Anthony L.; Foots, Ashley; Artstein, Ron; Gervits, Felix; Abrams, Mitchell; Cassidy, Henry; Donatelli, Lucia; Leuski, Anton; Hill, Susan G.; Traum, David; Voss, Clare
SCOUT: A Situated and Multi-Modal Human-Robot Dialogue Corpus Journal Article
In: pp. 14445 - 144458, 2024.
Abstract | Links | BibTeX | Tags:
@article{lukin-etal-2024-scout-situated,
title = {SCOUT: A Situated and Multi-Modal Human-Robot Dialogue Corpus},
author = {Stephanie M Lukin and Claire Bonial and Matthew Marge and Taylor Hudson and Cory J. Hayes and Kimberly Pollard and Anthony L. Baker and Ashley Foots and Ron Artstein and Felix Gervits and Mitchell Abrams and Henry Cassidy and Lucia Donatelli and Anton Leuski and Susan G. Hill and David Traum and Clare Voss},
url = {https://aclanthology.org/2024.lrec-main.1259},
year = {2024},
date = {2024-05-01},
pages = {14445 - 144458},
abstract = {We introduce the Situated Corpus Of Understanding Transactions (SCOUT), a multi-modal collection of human-robot dialogue in the task domain of collaborative exploration. The corpus was constructed from multiple Wizard-of-Oz experiments where human participants gave verbal instructions to a remotely-located robot to move and gather information about its surroundings. SCOUT contains 89,056 utterances and 310,095 words from 278 dialogues averaging 320 utterances per dialogue. The dialogues are aligned with the multi-modal data streams available during the experiments: 5,785 images and 30 maps. The corpus has been annotated with Abstract Meaning Representation and Dialogue-AMR to identify the speaker’s intent and meaning within an utterance, and with Transactional Units and Relations to track relationships between utterances to reveal patterns of the Dialogue Structure. We describe how the corpus and its annotations have been used to develop autonomous human-robot systems and enable research in open questions of how humans speak to robots. We release this corpus to accelerate progress in autonomous, situated, human-robot dialogue, especially in the context of navigation tasks where details about the environment need to be discovered.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
West, Taylor Nicole; Prinzing, Michael; Garton, Catherine; Berman, Catherine J.; Zhou, Jieni; Hale, James; Gratch, Jonathan; Fredrickson, Barbara
2024.
Abstract | Links | BibTeX | Tags: Emotions, Virtual Humans
@misc{west_improving_2024,
title = {Improving Social Connection with Weak Ties and Strangers: Effects of a New Micro-Intervention on Interaction Quality and Social Behavior},
author = {Taylor Nicole West and Michael Prinzing and Catherine Garton and Catherine J. Berman and Jieni Zhou and James Hale and Jonathan Gratch and Barbara Fredrickson},
url = {https://osf.io/ytjr6},
doi = {10.31234/osf.io/ytjr6},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
abstract = {We propose that the emotional quality of people’s interactions with acquaintances (i.e., weak ties) and strangers contributes to well-being. We test whether a new micro-intervention can raise the quality of these interactions. We randomized young adults (N = 335) to this connectedness micro-intervention or a control intervention. Both interventions were delivered via a psychoeducational video followed by a brief conversation with a virtual human, with whom participants developed if-then plans to carry out their assigned behavioral goal. Pre-intervention, high-quality weak-tie and stranger interactions were associated with lower loneliness and greater mental health independent of strong-tie interaction quality. Experimental data showed the connectedness intervention improved the emotional quality of participants' interactions with weak ties and strangers over two days, evident in participants’ episodic self-reports and faster in-lab conversational response time. Discussion centers on implications for developing scalable behavioral interventions to improve well-being.},
keywords = {Emotions, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hao; Chang, Di; Li, Fang; Soleymani, Mohammad; Ahuja, Narendra
MagicPose4D: Crafting Articulated Models with Appearance and Motion Control Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: VGL, Virtual Humans
@misc{zhang_magicpose4d_2024,
title = {MagicPose4D: Crafting Articulated Models with Appearance and Motion Control},
author = {Hao Zhang and Di Chang and Fang Li and Mohammad Soleymani and Narendra Ahuja},
url = {https://arxiv.org/abs/2405.14017},
doi = {10.48550/ARXIV.2405.14017},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
publisher = {arXiv},
abstract = {With the success of 2D and 3D visual generative models, there is growing interest in generating 4D content. Existing methods primarily rely on text prompts to produce 4D content, but they often fall short of accurately defining complex or rare motions. To address this limitation, we propose MagicPose4D, a novel framework for refined control over both appearance and motion in 4D generation. Unlike traditional methods, MagicPose4D accepts monocular videos as motion prompts, enabling precise and customizable motion generation. MagicPose4D comprises two key modules:
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.},
note = {Version Number: 1},
keywords = {VGL, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.