Publications
Search
Gurney, Nikolos; Miller, John H.; Pynadath, David V.
Exploring the choice landscape: Anchoring and framing effects on search behavior in complex choices Journal Article
In: Journal of Choice Modelling, vol. 55, pp. 100549, 2025, ISSN: 17555345.
@article{gurney_exploring_2025,
title = {Exploring the choice landscape: Anchoring and framing effects on search behavior in complex choices},
author = {Nikolos Gurney and John H. Miller and David V. Pynadath},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1755534525000120},
doi = {10.1016/j.jocm.2025.100549},
issn = {17555345},
year = {2025},
date = {2025-06-01},
urldate = {2025-04-15},
journal = {Journal of Choice Modelling},
volume = {55},
pages = {100549},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Klumpe, Stella; Mitchell, Kelsey C.; Cox, Emma; Katz, Jeffrey S.; Lazarowski, Lucia; Deshpande, Gopikrishna; Gratch, Jonathan; Visser, Ewart J. De; Ayaz, Hasan; Li, Xingnan; Franke, Adrian A.; Krueger, Frank
Social bonding between humans, animals, and robots: Dogs outperform AIBOs, their robotic replicas, as social companions Journal Article
In: PLoS One, vol. 20, no. 6, pp. e0324312, 2025, ISSN: 1932-6203.
@article{klumpe_social_2025,
title = {Social bonding between humans, animals, and robots: Dogs outperform AIBOs, their robotic replicas, as social companions},
author = {Stella Klumpe and Kelsey C. Mitchell and Emma Cox and Jeffrey S. Katz and Lucia Lazarowski and Gopikrishna Deshpande and Jonathan Gratch and Ewart J. De Visser and Hasan Ayaz and Xingnan Li and Adrian A. Franke and Frank Krueger},
editor = {Casey R. Lynch},
url = {https://dx.plos.org/10.1371/journal.pone.0324312},
doi = {10.1371/journal.pone.0324312},
issn = {1932-6203},
year = {2025},
date = {2025-06-01},
urldate = {2025-06-12},
journal = {PLoS One},
volume = {20},
number = {6},
pages = {e0324312},
abstract = {In the evolving landscape of technology, robots have emerged as social companions, prompting an investigation into social bonding between humans and robots. While human-animal interactions are well-studied, human-robot interactions (HRI) remain comparatively underexplored. Ethorobotics, a field of social robotic engineering based on ecology and ethology, suggests designing companion robots modeled on animal companions, which are simpler to emulate than humans. However, it is unclear whether these robots can match the social companionship provided by their original models. This study examined social bonding between humans and AIBOs, dog-inspired companion robots, compared to real dogs. Nineteen female participants engaged in 12 affiliative interactions with dogs and AIBOs across two counter-balanced, one-month bonding phases. Social bonding was assessed through urinary oxytocin (OXT) level change over an interaction, self-reported attachment using an adapted version of the Lexington Attachment to Pets Scale, and social companionship evaluations administering the Robot-Dog Questionnaire. To examine OXT level changes and self-reported attachment by comparing the two social companions, we conducted mixed-effects model analyses and planned follow-up comparisons. Frequency comparison, binary logistic regression, and thematic analysis were performed to analyze social companionship evaluations. Results revealed significant differences between dogs and AIBOs in fostering social bonds. OXT level change increased during interactions with dogs but decreased with AIBOs. Participants reported stronger attachment to dogs and rated them as better social companions. These findings highlight the current limitations of AIBOs in fostering social bonding immediately compared to dogs. Our study contributes to the growing HRI research by demonstrating an existing gap between AIBOs and dogs as social companions. It highlights the need for further investigation to understand the complexities of social bonding with companion robots, which is essential to implement successful applications for social robots in diverse domains such as the elderly and health care, education, and entertainment.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chang, Di; Cao, Mingdeng; Shi, Yichun; Liu, Bo; Cai, Shengqu; Zhou, Shijie; Huang, Weilin; Wetzstein, Gordon; Soleymani, Mohammad; Wang, Peng
ByteMorph: Benchmarking Instruction-Guided Image Editing with Non-Rigid Motions Miscellaneous
2025, (arXiv:2506.03107 [cs]).
@misc{chang_bytemorph_2025,
title = {ByteMorph: Benchmarking Instruction-Guided Image Editing with Non-Rigid Motions},
author = {Di Chang and Mingdeng Cao and Yichun Shi and Bo Liu and Shengqu Cai and Shijie Zhou and Weilin Huang and Gordon Wetzstein and Mohammad Soleymani and Peng Wang},
url = {http://arxiv.org/abs/2506.03107},
doi = {10.48550/arXiv.2506.03107},
year = {2025},
date = {2025-06-01},
urldate = {2025-06-17},
publisher = {arXiv},
abstract = {Editing images with instructions to reflect non-rigid motions, camera viewpoint shifts, object deformations, human articulations, and complex interactions, poses a challenging yet underexplored problem in computer vision. Existing approaches and datasets predominantly focus on static scenes or rigid transformations, limiting their capacity to handle expressive edits involving dynamic motion. To address this gap, we introduce ByteMorph, a comprehensive framework for instruction-based image editing with an emphasis on non-rigid motions. ByteMorph comprises a large-scale dataset, ByteMorph-6M, and a strong baseline model built upon the Diffusion Transformer (DiT), named ByteMorpher. ByteMorph-6M includes over 6 million high-resolution image editing pairs for training, along with a carefully curated evaluation benchmark ByteMorph-Bench. Both capture a wide variety of non-rigid motion types across diverse environments, human figures, and object categories. The dataset is constructed using motion-guided data generation, layered compositing techniques, and automated captioning to ensure diversity, realism, and semantic coherence. We further conduct a comprehensive evaluation of recent instruction-based image editing methods from both academic and commercial domains.},
note = {arXiv:2506.03107 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Kim, Peter H.; Gratch, Jonathan
“Provably fair” algorithms may perpetuate racial and gender bias: a study of salary dispute resolution Journal Article
In: Auton Agent Multi-Agent Syst, vol. 39, no. 1, pp. 20, 2025, ISSN: 1387-2532, 1573-7454.
@article{hale_provably_2025,
title = {“Provably fair” algorithms may perpetuate racial and gender bias: a study of salary dispute resolution},
author = {James Hale and Peter H. Kim and Jonathan Gratch},
url = {https://link.springer.com/10.1007/s10458-025-09703-x},
doi = {10.1007/s10458-025-09703-x},
issn = {1387-2532, 1573-7454},
year = {2025},
date = {2025-06-01},
urldate = {2025-03-18},
journal = {Auton Agent Multi-Agent Syst},
volume = {39},
number = {1},
pages = {20},
abstract = {Abstract
Prior work suggests automated dispute resolution tools using “provably fair” algorithms can address disparities between demographic groups. These methods use multi-criteria elicited preferences from all disputants and satisfy constraints to generate “fair” solutions. However, we analyze the potential for inequity to permeate proposals through the preference elicitation stage. This possibility arises if differences in dispositional attitudes differ between demographics, and those dispositions affect elicited preferences. Specifically, risk aversion plays a prominent role in predicting preferences. Risk aversion predicts a weaker relative preference for
salary
and a softer within-issue utility for each issue; this leads to worse compensation packages for risk-averse groups. These results raise important questions in AI-value alignment about whether an AI mediator should take explicit preferences at face value.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Prior work suggests automated dispute resolution tools using “provably fair” algorithms can address disparities between demographic groups. These methods use multi-criteria elicited preferences from all disputants and satisfy constraints to generate “fair” solutions. However, we analyze the potential for inequity to permeate proposals through the preference elicitation stage. This possibility arises if differences in dispositional attitudes differ between demographics, and those dispositions affect elicited preferences. Specifically, risk aversion plays a prominent role in predicting preferences. Risk aversion predicts a weaker relative preference for
salary
and a softer within-issue utility for each issue; this leads to worse compensation packages for risk-averse groups. These results raise important questions in AI-value alignment about whether an AI mediator should take explicit preferences at face value.
Traum, David; Brixey, Jacqueline
Does a code-switching dialogue system help users learn conversational fluency in Choctaw? Journal Article
In: Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP), pp. 8-17, 2025, ISBN: 979-8-89176-236-7.
@article{brixey-traum-2025-code,
title = {Does a code-switching dialogue system help users learn conversational fluency in Choctaw?},
author = {David Traum and Jacqueline Brixey},
url = {https://aclanthology.org/2025.americasnlp-1.2/},
doi = {10.18653/v1/2025.americasnlp-1.2},
isbn = {979-8-89176-236-7},
year = {2025},
date = {2025-05-05},
urldate = {2025-05-05},
journal = {Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)},
pages = {8-17},
publisher = {Association for Computational Linguistics},
address = {Albuquerque, New Mexico},
abstract = {We investigate the learning outcomes and user response to a chatbot for practicing conversational Choctaw, an endangered American Indigenous language. Conversational fluency is a goal for many language learners, however, for learners of endangered languages in North America, access to fluent speakers may be limited. Chatbots are potentially ideal dialogue partners as this kind of dialogue system fulfills a non-authoritative role by focusing on carrying on a conversation as an equal conversational partner. The goal of the chatbot investigated in this work is to serve as a conversational partner in the absence of a fluent Choctaw-speaking human interlocutor. We investigate the impact of code-switching in the interaction, comparing a bilingual chatbot against a monolingual Choctaw version. We evaluate the systems for user engagement and enjoyment, as well as gains in conversational fluency from interacting with the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Hale, James; Kim, HanMoe; Choi, Ahyoung; Gratch, Jonathan
AI-Mediated Dispute Resolution Journal Article
In: AAAI-SS, vol. 5, no. 1, pp. 67–70, 2025, ISSN: 2994-4317.
@article{hale_ai-mediated_2025,
title = {AI-Mediated Dispute Resolution},
author = {James Hale and HanMoe Kim and Ahyoung Choi and Jonathan Gratch},
url = {https://ojs.aaai.org/index.php/AAAI-SS/article/view/35558},
doi = {10.1609/aaaiss.v5i1.35558},
issn = {2994-4317},
year = {2025},
date = {2025-05-01},
urldate = {2025-08-19},
journal = {AAAI-SS},
volume = {5},
number = {1},
pages = {67–70},
abstract = {We examine the effectiveness of large language model (LLM) mediations in the under-studied dispute resolution domain. We first used a new corpus of dispute resolutions, KODIS, to investigate if LLMs can correctly identify whether to intervene. We find evidence that GPT as a mediator picks up on salient aspects of a dispute, such as Frustration and whether the disputants ultimately come to a resolution or stall at an impasse — intervening significantly more so in cases of high frustration and impasse. Afterward, we ran a user study to compare GPT mediations against those of novice human mediators. We find participants agreed GPT's mediations were more likely to lead to resolution; were better positioned in the dialog; had better justification than human-crafted ones; and, on a forced choice, were generally more effective than novice human mediations.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Han, Bin; Gratch, Jonathan
Salience Adjustment for Context-Based Emotion Recognition Proceedings Article
In: 2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG), pp. 1–6, IEEE, Tampa/Clearwater, FL, USA, 2025, ISBN: 979-8-3315-5341-8.
@inproceedings{han_salience_2025,
title = {Salience Adjustment for Context-Based Emotion Recognition},
author = {Bin Han and Jonathan Gratch},
url = {https://ieeexplore.ieee.org/document/11099210/},
doi = {10.1109/FG61629.2025.11099210},
isbn = {979-8-3315-5341-8},
year = {2025},
date = {2025-05-01},
urldate = {2025-08-19},
booktitle = {2025 IEEE 19th International Conference on Automatic Face and Gesture Recognition (FG)},
pages = {1–6},
publisher = {IEEE},
address = {Tampa/Clearwater, FL, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Okado, Yuko; Nye, Benjamin D.; Aguirre, Angelica; Swartout, William
In: Int J Artif Intell Educ, 2025, ISSN: 1560-4292, 1560-4306.
@article{okado_how_2025,
title = {How Can Virtual Agents Scale Up Mentoring?: Insights from College Students’ Experiences Using the CareerFair.ai Platform at an American Hispanic-Serving Institution},
author = {Yuko Okado and Benjamin D. Nye and Angelica Aguirre and William Swartout},
url = {https://link.springer.com/10.1007/s40593-025-00482-w},
doi = {10.1007/s40593-025-00482-w},
issn = {1560-4292, 1560-4306},
year = {2025},
date = {2025-05-01},
urldate = {2025-06-24},
journal = {Int J Artif Intell Educ},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Core, Mark; Nye, Benjamin; Carr, Kayla; Li, Shirley; Shiel, Aaron; Auerbach, Daniel; Leeds, Andrew; Swartout, William
Usability and Preferences for a Personalized Adaptive Learning System for AI Upskilling Journal Article
In: FLAIRS, vol. 38, 2025, ISSN: 2334-0762, 2334-0754.
@article{core_usability_2025,
title = {Usability and Preferences for a Personalized Adaptive Learning System for AI Upskilling},
author = {Mark Core and Benjamin Nye and Kayla Carr and Shirley Li and Aaron Shiel and Daniel Auerbach and Andrew Leeds and William Swartout},
url = {https://journals.flvc.org/FLAIRS/article/view/138996},
doi = {10.32473/flairs.38.1.138996},
issn = {2334-0762, 2334-0754},
year = {2025},
date = {2025-05-01},
urldate = {2025-05-20},
journal = {FLAIRS},
volume = {38},
abstract = {As AI tools become common across jobs and industries, it is critical to broaden education about AI beyond teaching computer scientists how to build AI systems. To expand AI education, we are researching AI for AI learning: a personalized and adaptive learning system that integrates dialog-based tutoring and gamified programming activities. To study this problem, we adapted and expanded an existing smartphone adaptive coach to develop the Game-if-AI system. Using a design-based research approach, Game-if-AI was iteratively tested and improved across four semesters of optional use in a course designed for technician-level understanding of AI: mastering programming skills to apply AI libraries and established models. In this study, we measured the interests and needs of these technical learners, based on both survey data and on how they engaged with topics in the system. Based on this data, new topics were added and the system was refined. In this paper, we report students' usability ratings for system components and student preferences based on completion rates of AI topics available each semester. Students rated the adaptive system positively overall (93% rated as a "good idea"), but more complex learning activities (tutoring dialogs, programming) were rated lower than traditional ones (e.g., multiple choice, reading). Students were most likely to master topics highly aligned to the course materials, as well as self-directed learning toward easier high-interest topics (e.g., LLM Prompting).},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Ning; Fu, Boxi; Dincer, Betul; Masur, Omkar; Faizi, David; Ravindran, Harshul; Wang, Julia; Lai, Devashish; Merchant, Chirag
Becoming Fei: An Educational Game for AI and Data Science Education for Novice Learners Book Section
In: Smith, Brian K.; Borge, Marcela (Ed.): Learning and Collaboration Technologies, vol. 15808, pp. 69–79, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-93745-3 978-3-031-93746-0, (Series Title: Lecture Notes in Computer Science).
@incollection{smith_becoming_2025,
title = {Becoming Fei: An Educational Game for AI and Data Science Education for Novice Learners},
author = {Ning Wang and Boxi Fu and Betul Dincer and Omkar Masur and David Faizi and Harshul Ravindran and Julia Wang and Devashish Lai and Chirag Merchant},
editor = {Brian K. Smith and Marcela Borge},
url = {https://link.springer.com/10.1007/978-3-031-93746-0_6},
doi = {10.1007/978-3-031-93746-0_6},
isbn = {978-3-031-93745-3 978-3-031-93746-0},
year = {2025},
date = {2025-05-01},
urldate = {2025-06-12},
booktitle = {Learning and Collaboration Technologies},
volume = {15808},
pages = {69–79},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Awada, Mohamad; Gerber, Burcin Becerik; Lucas, Gale M.; Roll, Shawn C.
The Impact of Color Correlated Temperature and Illuminance Levels of Office Lighting on Stress and Cognitive Restoration Journal Article
In: Journal of Environmental Psychology, pp. 102628, 2025, ISSN: 02724944.
@article{awada_impact_2025,
title = {The Impact of Color Correlated Temperature and Illuminance Levels of Office Lighting on Stress and Cognitive Restoration},
author = {Mohamad Awada and Burcin Becerik Gerber and Gale M. Lucas and Shawn C. Roll},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0272494425001112},
doi = {10.1016/j.jenvp.2025.102628},
issn = {02724944},
year = {2025},
date = {2025-05-01},
urldate = {2025-05-20},
journal = {Journal of Environmental Psychology},
pages = {102628},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gordon, Andrew
Logical Abduction as a Computational Model of Narrative Proceedings Article
In: Geneva, Switzerland, 2025.
@inproceedings{gordon_andrew_logical_2025,
title = {Logical Abduction as a Computational Model of Narrative},
author = {Andrew Gordon},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://asgordon.github.io/publications/CMN2025.PDF},
year = {2025},
date = {2025-05-01},
address = {Geneva, Switzerland},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chaubey, Ashutosh; Guan, Xulang; Soleymani, Mohammad
Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning Miscellaneous
2025, (Version Number: 1).
@misc{chaubey_face-llava_2025,
title = {Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning},
author = {Ashutosh Chaubey and Xulang Guan and Mohammad Soleymani},
url = {https://arxiv.org/abs/2504.07198},
doi = {10.48550/ARXIV.2504.07198},
year = {2025},
date = {2025-04-01},
urldate = {2025-04-15},
publisher = {arXiv},
abstract = {The human face plays a central role in social communication, necessitating the use of performant computer vision tools for human-centered applications. We propose Face-LLaVA, a multimodal large language model for face-centered, in-context learning, including facial expression and attribute recognition. Additionally, Face-LLaVA is able to generate natural language descriptions that can be used for reasoning. Leveraging existing visual databases, we first developed FaceInstruct-1M, a face-centered database for instruction tuning MLLMs for face processing. We then developed a novel face-specific visual encoder powered by Face-Region Guided Cross-Attention that integrates face geometry with local visual features. We evaluated the proposed method across nine different datasets and five different face processing tasks, including facial expression recognition, action unit detection, facial attribute detection, age estimation and deepfake detection. Face-LLaVA achieves superior results compared to existing open-source MLLMs and competitive performance compared to commercial solutions. Our model output also receives a higher reasoning rating by GPT under a zero-shot setting across all the tasks. Both our dataset and model wil be released at https://face-llava.github.io to support future advancements in social AI and foundational vision-language research.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Rakshit, Sushrita; Chawla, Kushal; Brett, Jeanne M.; Gratch, Jonathan
KODIS: A Multicultural Dispute Resolution Dialogue Corpus Miscellaneous
2025, (arXiv:2504.12723 [cs]).
@misc{hale_kodis_2025,
title = {KODIS: A Multicultural Dispute Resolution Dialogue Corpus},
author = {James Hale and Sushrita Rakshit and Kushal Chawla and Jeanne M. Brett and Jonathan Gratch},
url = {http://arxiv.org/abs/2504.12723},
doi = {10.48550/arXiv.2504.12723},
year = {2025},
date = {2025-04-01},
urldate = {2025-05-20},
publisher = {arXiv},
abstract = {We present KODIS, a dyadic dispute resolution corpus containing thousands of dialogues from over 75 countries. Motivated by a theoretical model of culture and conflict, participants engage in a typical customer service dispute designed by experts to evoke strong emotions and conflict. The corpus contains a rich set of dispositional, process, and outcome measures. The initial analysis supports theories of how anger expressions lead to escalatory spirals and highlights cultural differences in emotional expression. We make this corpus and data collection framework available to the community.},
note = {arXiv:2504.12723 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Lin, Spencer; Jun, Miru; Rizk, Basem; Shieh, Karen; Fisher, Scott; Mozgai, Sharon
Optimizing SIA Development: A Case Study in User-Centered Design for Estuary, a Multimodal Socially Interactive Agent Framework Proceedings Article
In: Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1–9, 2025, (arXiv:2504.14427 [cs]).
@inproceedings{lin_optimizing_2025,
title = {Optimizing SIA Development: A Case Study in User-Centered Design for Estuary, a Multimodal Socially Interactive Agent Framework},
author = {Spencer Lin and Miru Jun and Basem Rizk and Karen Shieh and Scott Fisher and Sharon Mozgai},
url = {http://arxiv.org/abs/2504.14427},
doi = {10.1145/3706599.3707399},
year = {2025},
date = {2025-04-01},
urldate = {2025-05-20},
booktitle = {Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems},
pages = {1–9},
abstract = {This case study presents our user-centered design model for Socially Intelligent Agent (SIA) development frameworks through our experience developing Estuary, an open source multimodal framework for building low-latency real-time socially interactive agents. We leverage the Rapid Assessment Process (RAP) to collect the thoughts of leading researchers in the field of SIAs regarding the current state of the art for SIA development as well as their evaluation of how well Estuary may potentially address current research gaps. We achieve this through a series of end-user interviews conducted by a fellow researcher in the community. We hope that the findings of our work will not only assist the continued development of Estuary but also guide the development of other future frameworks and technologies for SIAs.},
note = {arXiv:2504.14427 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brun, Antonin; Lucas, Gale; Becerik-Gerber, Burçin
Under Pressure: Contextualizing Workplace Stress Towards User-Centered Interventions Proceedings Article
In: Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1–9, ACM, Yokohama Japan, 2025, ISBN: 979-8-4007-1395-8.
@inproceedings{brun_under_2025,
title = {Under Pressure: Contextualizing Workplace Stress Towards User-Centered Interventions},
author = {Antonin Brun and Gale Lucas and Burçin Becerik-Gerber},
url = {https://dl.acm.org/doi/10.1145/3706599.3719987},
doi = {10.1145/3706599.3719987},
isbn = {979-8-4007-1395-8},
year = {2025},
date = {2025-04-01},
urldate = {2025-06-12},
booktitle = {Proceedings of the Extended Abstracts of the CHI Conference on Human Factors in Computing Systems},
pages = {1–9},
publisher = {ACM},
address = {Yokohama Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Ziming; Xu, Jiuyi; Suen, Christine Wun Ki; Chen, Meida; Zou, Zhengbo; Shi, Yangming
Egocentric camera-based method for detecting static hazardous objects on construction sites Journal Article
In: Automation in Construction, vol. 172, pp. 106048, 2025, ISSN: 09265805.
@article{liu_egocentric_2025,
title = {Egocentric camera-based method for detecting static hazardous objects on construction sites},
author = {Ziming Liu and Jiuyi Xu and Christine Wun Ki Suen and Meida Chen and Zhengbo Zou and Yangming Shi},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0926580525000883},
doi = {10.1016/j.autcon.2025.106048},
issn = {09265805},
year = {2025},
date = {2025-04-01},
urldate = {2025-03-18},
journal = {Automation in Construction},
volume = {172},
pages = {106048},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Xiong, Haolin; Muttukuru, Sairisheek; Xiao, Hanyuan; Upadhyay, Rishi; Chari, Pradyumna; Zhao, Yajie; Kadambi, Achuta
Sparsegs: Sparse View Synthesis Using 3D Gaussian Splatting Proceedings Article
In: 2025 International Conference on 3D Vision (3DV), pp. 1032–1041, IEEE, Singapore, Singapore, 2025, ISBN: 979-8-3315-3851-4.
@inproceedings{xiong_sparsegs_2025,
title = {Sparsegs: Sparse View Synthesis Using 3D Gaussian Splatting},
author = {Haolin Xiong and Sairisheek Muttukuru and Hanyuan Xiao and Rishi Upadhyay and Pradyumna Chari and Yajie Zhao and Achuta Kadambi},
url = {https://ieeexplore.ieee.org/document/11125578/},
doi = {10.1109/3DV66043.2025.00100},
isbn = {979-8-3315-3851-4},
year = {2025},
date = {2025-03-01},
urldate = {2025-09-25},
booktitle = {2025 International Conference on 3D Vision (3DV)},
pages = {1032–1041},
publisher = {IEEE},
address = {Singapore, Singapore},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Prasad, Pratusha B.; Hemmatyar, Omid; Zou, Caoyi; Zhao, Yajie
Bifocal polarization-sensitive metalens for rapid BRDF estimation Proceedings Article
In: Hua, Hong; Argaman, Naamah; Nikolov, Daniel K. (Ed.): Optical Architectures for Displays and Sensing in Augmented, Virtual, and Mixed Reality (AR, VR, MR) VI, pp. 44, SPIE, San Francisco, United States, 2025.
@inproceedings{prasad_bifocal_2025,
title = {Bifocal polarization-sensitive metalens for rapid BRDF estimation},
author = {Pratusha B. Prasad and Omid Hemmatyar and Caoyi Zou and Yajie Zhao},
editor = {Hong Hua and Naamah Argaman and Daniel K. Nikolov},
url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/13414/3042449/Bifocal-polarization-sensitive-metalens-for-rapid-BRDF-estimation/10.1117/12.3042449.full},
doi = {10.1117/12.3042449},
year = {2025},
date = {2025-03-01},
urldate = {2025-07-17},
booktitle = {Optical Architectures for Displays and Sensing in Augmented, Virtual, and Mixed Reality (AR, VR, MR) VI},
pages = {44},
publisher = {SPIE},
address = {San Francisco, United States},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Siniukov, Maksim; Chang, Di; Tran, Minh; Gong, Hongkun; Chaubey, Ashutosh; Soleymani, Mohammad
DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion Miscellaneous
2025, (Version Number: 1).
@misc{siniukov_ditailistener_2025,
title = {DiTaiListener: Controllable High Fidelity Listener Video Generation with Diffusion},
author = {Maksim Siniukov and Di Chang and Minh Tran and Hongkun Gong and Ashutosh Chaubey and Mohammad Soleymani},
url = {https://arxiv.org/abs/2504.04010},
doi = {10.48550/ARXIV.2504.04010},
year = {2025},
date = {2025-03-01},
urldate = {2025-04-15},
publisher = {arXiv},
abstract = {Generating naturalistic and nuanced listener motions for extended interactions remains an open problem. Existing methods often rely on low-dimensional motion codes for facial behavior generation followed by photorealistic rendering, limiting both visual fidelity and expressive richness. To address these challenges, we introduce DiTaiListener, powered by a video diffusion model with multimodal conditions. Our approach first generates short segments of listener responses conditioned on the speaker's speech and facial motions with DiTaiListener-Gen. It then refines the transitional frames via DiTaiListener-Edit for a seamless transition. Specifically, DiTaiListener-Gen adapts a Diffusion Transformer (DiT) for the task of listener head portrait generation by introducing a Causal Temporal Multimodal Adapter (CTM-Adapter) to process speakers' auditory and visual cues. CTM-Adapter integrates speakers' input in a causal manner into the video generation process to ensure temporally coherent listener responses. For long-form video generation, we introduce DiTaiListener-Edit, a transition refinement video-to-video diffusion model. The model fuses video segments into smooth and continuous videos, ensuring temporal consistency in facial expressions and image quality when merging short video segments produced by DiTaiListener-Gen. Quantitatively, DiTaiListener achieves the state-of-the-art performance on benchmark datasets in both photorealism (+73.8% in FID on RealTalk) and motion representation (+6.1% in FD metric on VICO) spaces. User studies confirm the superior performance of DiTaiListener, with the model being the clear preference in terms of feedback, diversity, and smoothness, outperforming competitors by a significant margin.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Filter
2025
Tak, Ala N.; Banayeeanzade, Amin; Bolourani, Anahita; Kian, Mina; Jia, Robin; Gratch, Jonathan
Mechanistic Interpretability of Emotion Inference in Large Language Models Miscellaneous
2025, (arXiv:2502.05489 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, LLM
@misc{tak_mechanistic_2025,
title = {Mechanistic Interpretability of Emotion Inference in Large Language Models},
author = {Ala N. Tak and Amin Banayeeanzade and Anahita Bolourani and Mina Kian and Robin Jia and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.05489},
doi = {10.48550/arXiv.2502.05489},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Large language models (LLMs) show promising capabilities in predicting human emotions from text. However, the mechanisms through which these models process emotional stimuli remain largely unexplored. Our study addresses this gap by investigating how autoregressive LLMs infer emotions, showing that emotion representations are functionally localized to specific regions in the model. Our evaluation includes diverse model families and sizes and is supported by robustness checks. We then show that the identified representations are psychologically plausible by drawing on cognitive appraisal theory, a well-established psychological framework positing that emotions emerge from evaluations (appraisals) of environmental stimuli. By causally intervening on construed appraisal concepts, we steer the generation and show that the outputs align with theoretical and intuitive expectations. This work highlights a novel way to causally intervene and precisely shape emotional text generation, potentially benefiting safety and alignment in sensitive affective domains.},
note = {arXiv:2502.05489 [cs]},
keywords = {DTIC, LLM},
pubstate = {published},
tppubtype = {misc}
}
Liu, Ruying; Becerik-Gerber, Burcin; Lucas, Gale M.; Busta, Kelly
Impact of behavior-based virtual training on active shooter incident preparedness in healthcare facilities Journal Article
In: International Journal of Disaster Risk Reduction, vol. 118, pp. 105225, 2025, ISSN: 22124209.
Links | BibTeX | Tags: DTIC, Virtual Humans
@article{liu_impact_2025,
title = {Impact of behavior-based virtual training on active shooter incident preparedness in healthcare facilities},
author = {Ruying Liu and Burcin Becerik-Gerber and Gale M. Lucas and Kelly Busta},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2212420925000494},
doi = {10.1016/j.ijdrr.2025.105225},
issn = {22124209},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
journal = {International Journal of Disaster Risk Reduction},
volume = {118},
pages = {105225},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Brun, Antonin; Liu, Ruying; Shukla, Aryan; Watson, Frances; Gratch, Jonathan
Exploring Emotion-Sensitive LLM-Based Conversational AI Miscellaneous
2025, (arXiv:2502.08920 [cs]).
Abstract | Links | BibTeX | Tags: AI, LLM
@misc{brun_exploring_2025,
title = {Exploring Emotion-Sensitive LLM-Based Conversational AI},
author = {Antonin Brun and Ruying Liu and Aryan Shukla and Frances Watson and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.08920},
doi = {10.48550/arXiv.2502.08920},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Conversational AI chatbots have become increasingly common within the customer service industry. Despite improvements in their emotional development, they often lack the authenticity of real customer service interactions or the competence of service providers. By comparing emotion-sensitive and emotion-insensitive LLM-based chatbots across 30 participants, we aim to explore how emotional sensitivity in chatbots influences perceived competence and overall customer satisfaction in service interactions. Additionally, we employ sentiment analysis techniques to analyze and interpret the emotional content of user inputs. We highlight that perceptions of chatbot trustworthiness and competence were higher in the case of the emotion-sensitive chatbot, even if issue resolution rates were not affected. We discuss implications of improved user satisfaction from emotion-sensitive chatbots and potential applications in support services.},
note = {arXiv:2502.08920 [cs]},
keywords = {AI, LLM},
pubstate = {published},
tppubtype = {misc}
}
Terada, Kazunori; Melo, Celso De; Santos, Francisco C.; Gratch, Jonathan
A Bayesian Model of Mind Reading from Decisions and Emotions in Social Dilemmas Journal Article
In: Proceedings of the Annual Meeting of the Cognitive Science Society, vol. 47, 2025.
Links | BibTeX | Tags: DTIC, Emotions
@article{terada_bayesian_2025,
title = {A Bayesian Model of Mind Reading from Decisions and Emotions in Social Dilemmas},
author = {Kazunori Terada and Celso De Melo and Francisco C. Santos and Jonathan Gratch},
url = {escholarship.org/uc/item/12f7f7f8#main},
year = {2025},
date = {2025-01-01},
journal = {Proceedings of the Annual Meeting of the Cognitive Science Society},
volume = {47},
keywords = {DTIC, Emotions},
pubstate = {published},
tppubtype = {article}
}
Tak, Ala N.; Gratch, Jonathan; Scherer, Klaus R.
Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models Journal Article
In: IEEE Trans. Affective Comput., pp. 1–11, 2025, ISSN: 1949-3045, 2371-9850.
Links | BibTeX | Tags: Emotions, LLM
@article{tak_aware_2025,
title = {Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models},
author = {Ala N. Tak and Jonathan Gratch and Klaus R. Scherer},
url = {https://ieeexplore.ieee.org/document/11045290/},
doi = {10.1109/TAFFC.2025.3581461},
issn = {1949-3045, 2371-9850},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
journal = {IEEE Trans. Affective Comput.},
pages = {1–11},
keywords = {Emotions, LLM},
pubstate = {published},
tppubtype = {article}
}
Walsh, Joel; Mamidanna, Siddarth; Nye, Benjamin; Core, Mark; Auerbach, Daniel
Fine-tuning for Better Few Shot Prompting: An Empirical Comparison for Short Answer Grading Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC, Machine Learning
@misc{walsh_fine-tuning_2025,
title = {Fine-tuning for Better Few Shot Prompting: An Empirical Comparison for Short Answer Grading},
author = {Joel Walsh and Siddarth Mamidanna and Benjamin Nye and Mark Core and Daniel Auerbach},
url = {https://arxiv.org/abs/2508.04063},
doi = {10.48550/ARXIV.2508.04063},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
publisher = {arXiv},
abstract = {Research to improve Automated Short Answer Grading has recently focused on Large Language Models (LLMs) with prompt engineering and no- or few-shot prompting to achieve best results. This is in contrast to the fine-tuning approach, which has historically required large-scale compute clusters inaccessible to most users. New closed-model approaches such as OpenAI's fine-tuning service promise results with as few as 100 examples, while methods using open weights such as quantized low-rank adaptive (QLORA) can be used to fine-tune models on consumer GPUs. We evaluate both of these fine-tuning methods, measuring their interaction with few-shot prompting for automated short answer grading (ASAG) with structured (JSON) outputs. Our results show that finetuning with small amounts of data has limited utility for Llama open-weight models, but that fine-tuning methods can outperform few-shot baseline instruction-tuned LLMs for OpenAI's closed models. While our evaluation set is limited, we find some evidence that the observed benefits of finetuning may be impacted by the domain subject matter. Lastly, we observed dramatic improvement with the LLama 3.1 8B-Instruct open-weight model by seeding the initial training examples with a significant amount of cheaply generated synthetic training data.},
note = {Version Number: 1},
keywords = {DTIC, Machine Learning},
pubstate = {published},
tppubtype = {misc}
}
Xue, Jintang; Zhao, Ganning; Yao, Jie-En; Chen, Hong-En; Hu, Yue; Chen, Meida; You, Suya; Kuo, C. -C. Jay
Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: LLM
@misc{xue_descrip3d_2025,
title = {Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions},
author = {Jintang Xue and Ganning Zhao and Jie-En Yao and Hong-En Chen and Yue Hu and Meida Chen and Suya You and C. -C. Jay Kuo},
url = {https://arxiv.org/abs/2507.14555},
doi = {10.48550/ARXIV.2507.14555},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
publisher = {arXiv},
abstract = {Understanding 3D scenes goes beyond simply recognizing objects; it requires reasoning about the spatial and semantic relationships between them. Current 3D scene-language models often struggle with this relational understanding, particularly when visual embeddings alone do not adequately convey the roles and interactions of objects. In this paper, we introduce Descrip3D, a novel and powerful framework that explicitly encodes the relationships between objects using natural language. Unlike previous methods that rely only on 2D and 3D embeddings, Descrip3D enhances each object with a textual description that captures both its intrinsic attributes and contextual relationships. These relational cues are incorporated into the model through a dual-level integration: embedding fusion and prompt-level injection. This allows for unified reasoning across various tasks such as grounding, captioning, and question answering, all without the need for task-specific heads or additional supervision. When evaluated on five benchmark datasets, including ScanRefer, Multi3DRefer, ScanQA, SQA3D, and Scan2Cap, Descrip3D consistently outperforms strong baseline models, demonstrating the effectiveness of language-guided relational representation for understanding complex indoor scenes.},
note = {Version Number: 1},
keywords = {LLM},
pubstate = {published},
tppubtype = {misc}
}
Behzad, Tina; Gurney, Nikolos; Wang, Ning; Pynadath, David V.
Beyond Predictions: A Study of AI Strength and Weakness Transparency Communication on Human-AI Collaboration Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: AI, DTIC
@misc{behzad_beyond_2025,
title = {Beyond Predictions: A Study of AI Strength and Weakness Transparency Communication on Human-AI Collaboration},
author = {Tina Behzad and Nikolos Gurney and Ning Wang and David V. Pynadath},
url = {https://arxiv.org/abs/2508.09033},
doi = {10.48550/ARXIV.2508.09033},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
publisher = {arXiv},
abstract = {The promise of human-AI teaming lies in humans and AI working together to achieve performance levels neither could accomplish alone. Effective communication between AI and humans is crucial for teamwork, enabling users to efficiently benefit from AI assistance. This paper investigates how AI communication impacts human-AI team performance. We examine AI explanations that convey an awareness of its strengths and limitations. To achieve this, we train a decision tree on the model's mistakes, allowing it to recognize and explain where and why it might err. Through a user study on an income prediction task, we assess the impact of varying levels of information and explanations about AI predictions. Our results show that AI performance insights enhance task performance, and conveying AI awareness of its strengths and weaknesses improves trust calibration. These findings highlight the importance of considering how information delivery influences user trust and reliance in AI-assisted decision-making.},
note = {Version Number: 1},
keywords = {AI, DTIC},
pubstate = {published},
tppubtype = {misc}
}
Rizzo, Albert; Mozgai, Sharon; Sigaras, Alexandros; Rubin, John E.; Jotwani, Rohan
In: Journal of Medical Extended Reality, vol. 2, no. 1, pp. 209–222, 2025, (_eprint: https://www.liebertpub.com/doi/pdf/10.1177/29941520251369450).
Abstract | Links | BibTeX | Tags: DTIC, MedVR
@article{rizzo_expert_2025,
title = {Expert Consensus Best Practices for the Safe, Ethical, and Effective Design and Implementation of Artificially Intelligent Conversational Agent (i.e., Chatbot/Virtual Human) Systems in Health Care Applications},
author = {Albert Rizzo and Sharon Mozgai and Alexandros Sigaras and John E. Rubin and Rohan Jotwani},
url = {https://www.liebertpub.com/doi/abs/10.1177/29941520251369450},
doi = {10.1177/29941520251369450},
year = {2025},
date = {2025-01-01},
journal = {Journal of Medical Extended Reality},
volume = {2},
number = {1},
pages = {209–222},
abstract = {The integration of artificially intelligent conversational agents (AICAs), variously referred to as chatbots and virtual humans (VHs), is transforming health care delivery and education. This article explores our perspective on best practices for the evolution, potential, and ethical considerations of AICAs in clinical and educational contexts. Early applications of simulation technology in health care focused on productivity improvements, teletherapy, and virtual reality therapy applications. Recent technological advancements have enabled the development of high-fidelity extended reality systems and AICAs capable of engaging users in credible interactions. These systems leverage natural language processing, machine learning, large language models, and advanced VH authoring software to create interactive, personalized, and engaging experiences. Recent efforts in the creation of AICAs suggest significant potential benefits, including enhanced patient engagement, improved access to self-care resources, and low-stigma interaction environments. They have demonstrated promise in mental health support, providing a sense of safety and encouraging open disclosure. However, the rapid adoption of AICAs raises critical challenges, including safeguarding user privacy, ensuring system reliability, and addressing ethical concerns. Incidents of harm, such as inappropriate interactions and psychological distress, highlight the need for rigorous design and implementation best practices. This article outlines key principles for developing safe, effective, and equitable AICAs, emphasizing transparency in artificial intelligence (AI) identity, accountability, cultural sensitivity, and informed consent. Additionally, the authors advocate for robust privacy measures, adaptive learning capabilities, and evidence-based content validation to optimize user experience and maintain trust. To mitigate risks, a “human-in-the-loop” approach is recommended, ensuring health care professionals oversee AI-supported decisions. By adhering to these best practices, AICAs can enhance health care accessibility, support clinical training, and complement human professionals. This work aims to provide a foundation for the ethical and effective integration of AICAs, maximizing their potential while minimizing risks, ultimately advancing patient care and education in the digital age.},
note = {_eprint: https://www.liebertpub.com/doi/pdf/10.1177/29941520251369450},
keywords = {DTIC, MedVR},
pubstate = {published},
tppubtype = {article}
}
Hartholt, Arno; Fast, Ed; Kim, Kevin; Sookiassian, Edwin; Leeds, Andrew
TAC-Twin: A Rapid Framework for Personalized Doppelgänger Avatar Creation Using a Modular Virtual Human Pipeline Proceedings Article
In: 2025.
Abstract | Links | BibTeX | Tags:
@inproceedings{hartholt_tac-twin_2025,
title = {TAC-Twin: A Rapid Framework for Personalized Doppelgänger Avatar Creation Using a Modular Virtual Human Pipeline},
author = {Arno Hartholt and Ed Fast and Kevin Kim and Edwin Sookiassian and Andrew Leeds},
url = {https://openaccess.cms-conferences.org/publications/book/978-1-964867-74-8/article/978-1-964867-74-8_51},
doi = {10.54941/ahfe1006807},
year = {2025},
date = {2025-01-01},
urldate = {2025-09-18},
abstract = {We present an end-to-end framework for rapidly creating interactive, personalized avatars for scalable training and simulation applications. Built as an extension of the Virtual Human Toolkit, the framework integrates technologies for audio-visual sensing, speech recognition, natural language processing, nonverbal behavior generation, and high-fidelity text-to-speech synthesis. A personalized avatar is defined here as a real-time, embodied digital representation of an actual individual rather than a generic character. The creation pipeline requires only a single facial photograph, processed through a photorealistic character generation workflow, then refined, customized, and deployed in a real-time 3D environment for integration with conversational AI and synthetic voice generation. The system also supports rapid generation of generic avatars from high-quality synthetic headshots produced by generative AI, enabling the creation of diverse, realistic or stylized cohorts within minutes. Our initial use case examines whether personalized avatars enhance engagement, motivation, and performance compared to generic avatars, with the hypothesis that personalization increases relevance, identification, and learning outcomes. We describe the architecture, avatar creation pipeline, and role of generative AI in accelerating development, and share early implementation insights.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Cai, Yunxuan; Xiang, Sitao; Li, Zongjian; Chen, Haiwei; Zhao, Yajie
Bringing Diversity from Diffusion Models to Semantic-Guided Face Asset Generation Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags:
@misc{cai_bringing_2025,
title = {Bringing Diversity from Diffusion Models to Semantic-Guided Face Asset Generation},
author = {Yunxuan Cai and Sitao Xiang and Zongjian Li and Haiwei Chen and Yajie Zhao},
url = {https://arxiv.org/abs/2504.15259},
doi = {10.48550/ARXIV.2504.15259},
year = {2025},
date = {2025-01-01},
urldate = {2025-06-25},
publisher = {arXiv},
abstract = {Digital modeling and reconstruction of human faces serve various applications. However, its availability is often hindered by the requirements of data capturing devices, manual labor, and suitable actors. This situation restricts the diversity, expressiveness, and control over the resulting models. This work aims to demonstrate that a semantically controllable generative network can provide enhanced control over the digital face modeling process. To enhance diversity beyond the limited human faces scanned in a controlled setting, we introduce a novel data generation pipeline that creates a high-quality 3D face database using a pre-trained diffusion model. Our proposed normalization module converts synthesized data from the diffusion model into high-quality scanned data. Using the 44,000 face models we obtained, we further developed an efficient GAN-based generator. This generator accepts semantic attributes as input, and generates geometry and albedo. It also allows continuous post-editing of attributes in the latent space. Our asset refinement component subsequently creates physically-based facial assets. We introduce a comprehensive system designed for creating and editing high-quality face assets. Our proposed model has undergone extensive experiment, comparison and evaluation. We also integrate everything into a web-based interactive tool. We aim to make this tool publicly available with the release of the paper.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Kang, Seoyoung; Yoon, Boram; Kim, Kangsoo; Gratch, Jonathan; Woo, Woontack
How Collaboration Context and Personality Traits Shape the Social Norms of Human-to-Avatar Identity Representation Journal Article
In: IEEE Trans. Visual. Comput. Graphics, pp. 1–10, 2025, ISSN: 1077-2626, 1941-0506, 2160-9306.
@article{kang_how_2025,
title = {How Collaboration Context and Personality Traits Shape the Social Norms of Human-to-Avatar Identity Representation},
author = {Seoyoung Kang and Boram Yoon and Kangsoo Kim and Jonathan Gratch and Woontack Woo},
url = {https://ieeexplore.ieee.org/document/10935702/},
doi = {10.1109/TVCG.2025.3549904},
issn = {1077-2626, 1941-0506, 2160-9306},
year = {2025},
date = {2025-01-01},
urldate = {2025-04-17},
journal = {IEEE Trans. Visual. Comput. Graphics},
pages = {1–10},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tran, Minh; Yin, Yufeng; Soleymani, Mohammad
SetPeER: Set-Based Personalized Emotion Recognition With Weak Supervision Journal Article
In: IEEE Trans. Affective Comput., pp. 1–15, 2025, ISSN: 1949-3045, 2371-9850.
Links | BibTeX | Tags: DTIC, Emotion
@article{tran_setpeer_2025,
title = {SetPeER: Set-Based Personalized Emotion Recognition With Weak Supervision},
author = {Minh Tran and Yufeng Yin and Mohammad Soleymani},
url = {https://ieeexplore.ieee.org/document/10993348/},
doi = {10.1109/TAFFC.2025.3568024},
issn = {1949-3045, 2371-9850},
year = {2025},
date = {2025-01-01},
urldate = {2025-05-20},
journal = {IEEE Trans. Affective Comput.},
pages = {1–15},
keywords = {DTIC, Emotion},
pubstate = {published},
tppubtype = {article}
}
Wang, Ning; Hurt, Timothy; Krakowski, Ari; Greenwald, Eric; Hammerman, Jim; Santos, Sabrina De Los; Masur, Omkar; Fu, Boxi; Merchant, Chirag
Virtually Human: An Exhibit for Public AI Education Book Section
In: Stephanidis, Constantine; Antona, Margherita; Ntoa, Stavroula; Salvendy, Gavriel (Ed.): HCI International 2025 Posters, vol. 2529, pp. 436–443, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-94170-2 978-3-031-94171-9, (Series Title: Communications in Computer and Information Science).
@incollection{stephanidis_virtually_2025,
title = {Virtually Human: An Exhibit for Public AI Education},
author = {Ning Wang and Timothy Hurt and Ari Krakowski and Eric Greenwald and Jim Hammerman and Sabrina De Los Santos and Omkar Masur and Boxi Fu and Chirag Merchant},
editor = {Constantine Stephanidis and Margherita Antona and Stavroula Ntoa and Gavriel Salvendy},
url = {https://link.springer.com/10.1007/978-3-031-94171-9_42},
doi = {10.1007/978-3-031-94171-9_42},
isbn = {978-3-031-94170-2 978-3-031-94171-9},
year = {2025},
date = {2025-01-01},
urldate = {2025-06-17},
booktitle = {HCI International 2025 Posters},
volume = {2529},
pages = {436–443},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Communications in Computer and Information Science},
keywords = {DTIC},
pubstate = {published},
tppubtype = {incollection}
}
Hu, Yue; Liu, Rong; Chen, Meida; Beerel, Peter; Feng, Andrew
SplatMAP: Online Dense Monocular SLAM with 3D Gaussian Splatting Miscellaneous
2025, (arXiv:2501.07015 [cs]).
Abstract | Links | BibTeX | Tags: VGL
@misc{hu_splatmap_2025,
title = {SplatMAP: Online Dense Monocular SLAM with 3D Gaussian Splatting},
author = {Yue Hu and Rong Liu and Meida Chen and Peter Beerel and Andrew Feng},
url = {http://arxiv.org/abs/2501.07015},
doi = {10.48550/arXiv.2501.07015},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Achieving high-fidelity 3D reconstruction from monocular video remains challenging due to the inherent limitations of traditional methods like Structure-from-Motion (SfM) and monocular SLAM in accurately capturing scene details. While differentiable rendering techniques such as Neural Radiance Fields (NeRF) address some of these challenges, their high computational costs make them unsuitable for real-time applications. Additionally, existing 3D Gaussian Splatting (3DGS) methods often focus on photometric consistency, neglecting geometric accuracy and failing to exploit SLAM's dynamic depth and pose updates for scene refinement. We propose a framework integrating dense SLAM with 3DGS for real-time, high-fidelity dense reconstruction. Our approach introduces SLAM-Informed Adaptive Densification, which dynamically updates and densifies the Gaussian model by leveraging dense point clouds from SLAM. Additionally, we incorporate Geometry-Guided Optimization, which combines edge-aware geometric constraints and photometric consistency to jointly optimize the appearance and geometry of the 3DGS scene representation, enabling detailed and accurate SLAM mapping reconstruction. Experiments on the Replica and TUM-RGBD datasets demonstrate the effectiveness of our approach, achieving state-of-the-art results among monocular systems. Specifically, our method achieves a PSNR of 36.864, SSIM of 0.985, and LPIPS of 0.040 on Replica, representing improvements of 10.7%, 6.4%, and 49.4%, respectively, over the previous SOTA. On TUM-RGBD, our method outperforms the closest baseline by 10.2%, 6.6%, and 34.7% in the same metrics. These results highlight the potential of our framework in bridging the gap between photometric and geometric dense 3D scene representations, paving the way for practical and efficient monocular dense reconstruction.},
note = {arXiv:2501.07015 [cs]},
keywords = {VGL},
pubstate = {published},
tppubtype = {misc}
}
Rizzo, Albert “Skip”; Giosan, Cezar; Deac, George; Zaporozhets, Olya; Syvak, Oksana; Dragayeva, Svetlana; Bodner, Ehud; Mann, Shel; Stone, Jessica
The Virtual Ukraine Project: Trauma Therapy in Warzones with Virtual Reality Book Section
In: Stone, Jessica (Ed.): Mental Health Virtual Reality, pp. 159–180, Wiley, 2025, ISBN: 978-1-394-27845-9 978-1-394-27848-0.
@incollection{stone_virtual_2025,
title = {The Virtual Ukraine Project: Trauma Therapy in Warzones with Virtual Reality},
author = {Albert “Skip” Rizzo and Cezar Giosan and George Deac and Olya Zaporozhets and Oksana Syvak and Svetlana Dragayeva and Ehud Bodner and Shel Mann and Jessica Stone},
editor = {Jessica Stone},
url = {https://onlinelibrary.wiley.com/doi/10.1002/9781394278480.ch12},
doi = {10.1002/9781394278480.ch12},
isbn = {978-1-394-27845-9 978-1-394-27848-0},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-16},
booktitle = {Mental Health Virtual Reality},
pages = {159–180},
publisher = {Wiley},
edition = {1},
keywords = {MedVR},
pubstate = {published},
tppubtype = {incollection}
}
Liu, Rong; Sun, Dylan; Chen, Meida; Wang, Yue; Feng, Andrew
Deformable Beta Splatting Miscellaneous
2025, (arXiv:2501.18630 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Narrative
@misc{liu_deformable_2025,
title = {Deformable Beta Splatting},
author = {Rong Liu and Dylan Sun and Meida Chen and Yue Wang and Andrew Feng},
url = {http://arxiv.org/abs/2501.18630},
doi = {10.48550/arXiv.2501.18630},
year = {2025},
date = {2025-01-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has advanced radiance field reconstruction by enabling real-time rendering. However, its reliance on Gaussian kernels for geometry and low-order Spherical Harmonics (SH) for color encoding limits its ability to capture complex geometries and diverse colors. We introduce Deformable Beta Splatting (DBS), a deformable and compact approach that enhances both geometry and color representation. DBS replaces Gaussian kernels with deformable Beta Kernels, which offer bounded support and adaptive frequency control to capture fine geometric details with higher fidelity while achieving better memory efficiency. In addition, we extended the Beta Kernel to color encoding, which facilitates improved representation of diffuse and specular components, yielding superior results compared to SH-based methods. Furthermore, Unlike prior densification techniques that depend on Gaussian properties, we mathematically prove that adjusting regularized opacity alone ensures distribution-preserved Markov chain Monte Carlo (MCMC), independent of the splatting kernel type. Experimental results demonstrate that DBS achieves state-of-the-art visual quality while utilizing only 45% of the parameters and rendering 1.5x faster than 3DGS-based methods. Notably, for the first time, splatting-based methods outperform state-of-the-art Neural Radiance Fields, highlighting the superior performance and efficiency of DBS for real-time radiance field rendering.},
note = {arXiv:2501.18630 [cs]},
keywords = {DTIC, Narrative},
pubstate = {published},
tppubtype = {misc}
}
Chang, Di; Xu, Hongyi; Xie, You; Gao, Yipeng; Kuang, Zhengfei; Cai, Shengqu; Zhang, Chenxu; Song, Guoxian; Wang, Chao; Shi, Yichun; Chen, Zeyuan; Zhou, Shijie; Luo, Linjie; Wetzstein, Gordon; Soleymani, Mohammad
X-Dyna: Expressive Dynamic Human Image Animation Miscellaneous
2025, (arXiv:2501.10021 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, VGL
@misc{chang_x-dyna_2025,
title = {X-Dyna: Expressive Dynamic Human Image Animation},
author = {Di Chang and Hongyi Xu and You Xie and Yipeng Gao and Zhengfei Kuang and Shengqu Cai and Chenxu Zhang and Guoxian Song and Chao Wang and Yichun Shi and Zeyuan Chen and Shijie Zhou and Linjie Luo and Gordon Wetzstein and Mohammad Soleymani},
url = {http://arxiv.org/abs/2501.10021},
doi = {10.48550/arXiv.2501.10021},
year = {2025},
date = {2025-01-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {We introduce X-Dyna, a novel zero-shot, diffusion-based pipeline for animating a single human image using facial expressions and body movements derived from a driving video, that generates realistic, context-aware dynamics for both the subject and the surrounding environment. Building on prior approaches centered on human pose control, X-Dyna addresses key shortcomings causing the loss of dynamic details, enhancing the lifelike qualities of human video animations. At the core of our approach is the Dynamics-Adapter, a lightweight module that effectively integrates reference appearance context into the spatial attentions of the diffusion backbone while preserving the capacity of motion modules in synthesizing fluid and intricate dynamic details. Beyond body pose control, we connect a local control module with our model to capture identity-disentangled facial expressions, facilitating accurate expression transfer for enhanced realism in animated scenes. Together, these components form a unified framework capable of learning physical human motion and natural scene dynamics from a diverse blend of human and scene videos. Comprehensive qualitative and quantitative evaluations demonstrate that X-Dyna outperforms state-of-the-art methods, creating highly lifelike and expressive animations. The code is available at https://github.com/bytedance/X-Dyna.},
note = {arXiv:2501.10021 [cs]},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {misc}
}
Rodrigues, Patrick Borges; Becerik-Gerber, Burcin; Soibelman, Lucio; Lucas, Gale M.; Roll, Shawn C.
Impact of selective environmental sound attenuation on operator performance, stress, attention, and task engagement in teleoperated demolition Journal Article
In: Automation in Construction, vol. 169, pp. 105876, 2025, ISSN: 09265805.
@article{rodrigues_impact_2025,
title = {Impact of selective environmental sound attenuation on operator performance, stress, attention, and task engagement in teleoperated demolition},
author = {Patrick Borges Rodrigues and Burcin Becerik-Gerber and Lucio Soibelman and Gale M. Lucas and Shawn C. Roll},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0926580524006125},
doi = {10.1016/j.autcon.2024.105876},
issn = {09265805},
year = {2025},
date = {2025-01-01},
urldate = {2024-12-20},
journal = {Automation in Construction},
volume = {169},
pages = {105876},
keywords = {DTIC},
pubstate = {published},
tppubtype = {article}
}
Siniukov, Maksim; Xing, Ellie; Sanaz,; Isfahani, Attaripour; Soleymani, Mohammad
Towards a Generalizable Speech Marker for Parkinson's Disease Diagnosis Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC
@misc{siniukov_towards_2025,
title = {Towards a Generalizable Speech Marker for Parkinson's Disease Diagnosis},
author = {Maksim Siniukov and Ellie Xing and Sanaz and Attaripour Isfahani and Mohammad Soleymani},
url = {https://arxiv.org/abs/2501.03581},
doi = {10.48550/ARXIV.2501.03581},
year = {2025},
date = {2025-01-01},
urldate = {2025-01-14},
publisher = {arXiv},
abstract = {Parkinson's Disease (PD) is a neurodegenerative disorder characterized by motor symptoms, including altered voice production in the early stages. Early diagnosis is crucial not only to improve PD patients' quality of life but also to enhance the efficacy of potential disease-modifying therapies during early neurodegeneration, a window often missed by current diagnostic tools. In this paper, we propose a more generalizable approach to PD recognition through domain adaptation and self-supervised learning. We demonstrate the generalization capabilities of the proposed approach across diverse datasets in different languages. Our approach leverages HuBERT, a large deep neural network originally trained for speech recognition and further trains it on unlabeled speech data from a population that is similar to the target group, i.e., the elderly, in a self-supervised manner. The model is then fine-tuned and adapted for use across different datasets in multiple languages, including English, Italian, and Spanish. Evaluations on four publicly available PD datasets demonstrate the model's efficacy, achieving an average specificity of 92.1% and an average sensitivity of 91.2%. This method offers objective and consistent evaluations across large populations, addressing the variability inherent in human assessments and providing a non-invasive, cost-effective and accessible diagnostic option.},
note = {Version Number: 1},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
2024
Addison, Parker; Nguyen, Minh-Tuan H.; Medan, Tomislav; Shah, Jinali; Manzari, Mohammad T.; McElrone, Brendan; Lalwani, Laksh; More, Aboli; Sharma, Smita; Roth, Holger R.; Yang, Isaac; Chen, Chester; Xu, Daguang; Cheng, Yan; Feng, Andrew; Xu, Ziyue
C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System Miscellaneous
2024, (arXiv:2412.13163 [cs]).
Abstract | Links | BibTeX | Tags: LLM
@misc{addison_c-fedrag_2024,
title = {C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System},
author = {Parker Addison and Minh-Tuan H. Nguyen and Tomislav Medan and Jinali Shah and Mohammad T. Manzari and Brendan McElrone and Laksh Lalwani and Aboli More and Smita Sharma and Holger R. Roth and Isaac Yang and Chester Chen and Daguang Xu and Yan Cheng and Andrew Feng and Ziyue Xu},
url = {http://arxiv.org/abs/2412.13163},
doi = {10.48550/arXiv.2412.13163},
year = {2024},
date = {2024-12-01},
urldate = {2025-03-20},
publisher = {arXiv},
abstract = {Organizations seeking to utilize Large Language Models (LLMs) for knowledge querying and analysis often encounter challenges in maintaining an LLM fine-tuned on targeted, up-to-date information that keeps answers relevant and grounded. Retrieval Augmented Generation (RAG) has quickly become a feasible solution for organizations looking to overcome the challenges of maintaining proprietary models and to help reduce LLM hallucinations in their query responses. However, RAG comes with its own issues regarding scaling data pipelines across tiered-access and disparate data sources. In many scenarios, it is necessary to query beyond a single data silo to provide richer and more relevant context for an LLM. Analyzing data sources within and across organizational trust boundaries is often limited by complex data-sharing policies that prohibit centralized data storage, therefore, inhibit the fast and effective setup and scaling of RAG solutions. In this paper, we introduce Confidential Computing (CC) techniques as a solution for secure Federated Retrieval Augmented Generation (FedRAG). Our proposed Confidential FedRAG system (C-FedRAG) enables secure connection and scaling of a RAG workflows across a decentralized network of data providers by ensuring context confidentiality. We also demonstrate how to implement a C-FedRAG system using the NVIDIA FLARE SDK and assess its performance using the MedRAG toolkit and MIRAGE benchmarking dataset.},
note = {arXiv:2412.13163 [cs]},
keywords = {LLM},
pubstate = {published},
tppubtype = {misc}
}
Murray, Benjamin; Brown, Richard; Ma, Pengcheng; Kerfoot, Eric; Xu, Daguang; Feng, Andrew; Cardoso, Jorge; Ourselin, Sebastien; Modat, Marc
Lazy Resampling: Fast and information preserving preprocessing for deep learning Journal Article
In: Computer Methods and Programs in Biomedicine, vol. 257, pp. 108422, 2024, ISSN: 01692607.
Links | BibTeX | Tags: Narrative
@article{murray_lazy_2024,
title = {Lazy Resampling: Fast and information preserving preprocessing for deep learning},
author = {Benjamin Murray and Richard Brown and Pengcheng Ma and Eric Kerfoot and Daguang Xu and Andrew Feng and Jorge Cardoso and Sebastien Ourselin and Marc Modat},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0169260724004152},
doi = {10.1016/j.cmpb.2024.108422},
issn = {01692607},
year = {2024},
date = {2024-12-01},
urldate = {2025-01-16},
journal = {Computer Methods and Programs in Biomedicine},
volume = {257},
pages = {108422},
keywords = {Narrative},
pubstate = {published},
tppubtype = {article}
}
Tran, Minh; Chang, Di; Siniukov, Maksim; Soleymani, Mohammad
DIM: Dyadic Interaction Modeling for Social Behavior Generation Book Section
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, vol. 15095, pp. 484–503, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-72912-6 978-3-031-72913-3, (Series Title: Lecture Notes in Computer Science).
Links | BibTeX | Tags: DTIC, Social
@incollection{leonardis_dim_2024,
title = {DIM: Dyadic Interaction Modeling for Social Behavior Generation},
author = {Minh Tran and Di Chang and Maksim Siniukov and Mohammad Soleymani},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72913-3_27},
doi = {10.1007/978-3-031-72913-3_27},
isbn = {978-3-031-72912-6 978-3-031-72913-3},
year = {2024},
date = {2024-12-01},
urldate = {2025-01-16},
booktitle = {Computer Vision – ECCV 2024},
volume = {15095},
pages = {484–503},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC, Social},
pubstate = {published},
tppubtype = {incollection}
}
Xu, Jiuyi; Chen, Meida; Feng, Andrew; Yu, Zifan; Shi, Yangming
Open-Vocabulary High-Resolution 3D (OVHR3D) Data Segmentation and Annotation Framework Journal Article
In: 2024, (Publisher: arXiv Version Number: 2).
Abstract | Links | BibTeX | Tags: DTIC, Narrative
@article{xu_open-vocabulary_2024,
title = {Open-Vocabulary High-Resolution 3D (OVHR3D) Data Segmentation and Annotation Framework},
author = {Jiuyi Xu and Meida Chen and Andrew Feng and Zifan Yu and Yangming Shi},
url = {https://arxiv.org/abs/2412.06268},
doi = {10.48550/ARXIV.2412.06268},
year = {2024},
date = {2024-12-01},
urldate = {2024-12-20},
abstract = {In the domain of the U.S. Army modeling and simulation, the availability of high quality annotated 3D data is pivotal to creating virtual environments for training and simulations. Traditional methodologies for 3D semantic and instance segmentation, such as KpConv, RandLA, Mask3D, etc., are designed to train on extensive labeled datasets to obtain satisfactory performance in practical tasks. This requirement presents a significant challenge, given the inherent scarcity of manually annotated 3D datasets, particularly for the military use cases. Recognizing this gap, our previous research leverages the One World Terrain data repository manually annotated databases, as showcased at IITSEC 2019 and 2021, to enrich the training dataset for deep learning models. However, collecting and annotating large scale 3D data for specific tasks remains costly and inefficient. To this end, the objective of this research is to design and develop a comprehensive and efficient framework for 3D segmentation tasks to assist in 3D data annotation. This framework integrates Grounding DINO and Segment anything Model, augmented by an enhancement in 2D image rendering via 3D mesh. Furthermore, the authors have also developed a user friendly interface that facilitates the 3D annotation process, offering intuitive visualization of rendered images and the 3D point cloud.},
note = {Publisher: arXiv
Version Number: 2},
keywords = {DTIC, Narrative},
pubstate = {published},
tppubtype = {article}
}
Roemmele, Melissa; Gordon, Andrew
From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items Proceedings Article
In: Findings of the Association for Computational Linguistics: EMNLP 2024, pp. 5193–5203, Association for Computational Linguistics, Miami, Florida, USA, 2024.
Links | BibTeX | Tags: DTIC, Learning Sciences
@inproceedings{roemmele_test-taking_2024,
title = {From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items},
author = {Melissa Roemmele and Andrew Gordon},
url = {https://aclanthology.org/2024.findings-emnlp.299},
doi = {10.18653/v1/2024.findings-emnlp.299},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2024},
pages = {5193–5203},
publisher = {Association for Computational Linguistics},
address = {Miami, Florida, USA},
keywords = {DTIC, Learning Sciences},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhu, Xin; Su, Zhenghui; Gratch, Jonathan; Culbertson, Heather
How Visualizing Touch Can Transform Perceptions of Intensity, Realism, and Emotion? Book Section
In: Kajimoto, Hiroyuki; Lopes, Pedro; Pacchierotti, Claudio; Basdogan, Cagatay; Gori, Monica; Lemaire-Semail, Betty; Marchal, Maud (Ed.): Haptics: Understanding Touch; Technology and Systems; Applications and Interaction, vol. 14768, pp. 194–207, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-70057-6 978-3-031-70058-3, (Series Title: Lecture Notes in Computer Science).
@incollection{kajimoto_how_2024,
title = {How Visualizing Touch Can Transform Perceptions of Intensity, Realism, and Emotion?},
author = {Xin Zhu and Zhenghui Su and Jonathan Gratch and Heather Culbertson},
editor = {Hiroyuki Kajimoto and Pedro Lopes and Claudio Pacchierotti and Cagatay Basdogan and Monica Gori and Betty Lemaire-Semail and Maud Marchal},
url = {https://link.springer.com/10.1007/978-3-031-70058-3_16},
doi = {10.1007/978-3-031-70058-3_16},
isbn = {978-3-031-70057-6 978-3-031-70058-3},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Haptics: Understanding Touch; Technology and Systems; Applications and Interaction},
volume = {14768},
pages = {194–207},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {VR},
pubstate = {published},
tppubtype = {incollection}
}
Siniukov, Maksim; Yin, Yufeng; Fast, Eli; Qi, Yingshan; Monga, Aarav; Kim, Audrey; Soleymani, Mohammad
SEMPI: A Database for Understanding Social Engagement in Video-Mediated Multiparty Interaction Proceedings Article
In: International Conference on Multimodel Interaction, pp. 546–555, ACM, San Jose Costa Rica, 2024, ISBN: 979-8-4007-0462-8.
Links | BibTeX | Tags: Social Simulation
@inproceedings{siniukov_sempi_2024,
title = {SEMPI: A Database for Understanding Social Engagement in Video-Mediated Multiparty Interaction},
author = {Maksim Siniukov and Yufeng Yin and Eli Fast and Yingshan Qi and Aarav Monga and Audrey Kim and Mohammad Soleymani},
url = {https://dl.acm.org/doi/10.1145/3678957.3685752},
doi = {10.1145/3678957.3685752},
isbn = {979-8-4007-0462-8},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {International Conference on Multimodel Interaction},
pages = {546–555},
publisher = {ACM},
address = {San Jose Costa Rica},
keywords = {Social Simulation},
pubstate = {published},
tppubtype = {inproceedings}
}
Andalibi, Nazanin; Stark, Luke; McDuff, Daniel; Picard, Rosalind; Gratch, Jonathan; Howell, Noura
What should we do with Emotion AI? Towards an Agenda for the Next 30 Years Proceedings Article
In: Companion Publication of the 2024 Conference on Computer-Supported Cooperative Work and Social Computing, pp. 98–101, ACM, San Jose Costa Rica, 2024, ISBN: 979-8-4007-1114-5.
Links | BibTeX | Tags: Emotion
@inproceedings{andalibi_what_2024,
title = {What should we do with Emotion AI? Towards an Agenda for the Next 30 Years},
author = {Nazanin Andalibi and Luke Stark and Daniel McDuff and Rosalind Picard and Jonathan Gratch and Noura Howell},
url = {https://dl.acm.org/doi/10.1145/3678884.3689135},
doi = {10.1145/3678884.3689135},
isbn = {979-8-4007-1114-5},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Companion Publication of the 2024 Conference on Computer-Supported Cooperative Work and Social Computing},
pages = {98–101},
publisher = {ACM},
address = {San Jose Costa Rica},
keywords = {Emotion},
pubstate = {published},
tppubtype = {inproceedings}
}
Loucks, Laura; Rizzo, Albert; Rothbaum, Barbara O.
Virtual Reality Exposure for Treating PTSD Due to Military Sexual Trauma Journal Article
In: J Clin Psychol, pp. jclp.23750, 2024, ISSN: 0021-9762, 1097-4679.
Abstract | Links | BibTeX | Tags: DTIC, MedVR
@article{loucks_virtual_2024,
title = {Virtual Reality Exposure for Treating PTSD Due to Military Sexual Trauma},
author = {Laura Loucks and Albert Rizzo and Barbara O. Rothbaum},
url = {https://onlinelibrary.wiley.com/doi/10.1002/jclp.23750},
doi = {10.1002/jclp.23750},
issn = {0021-9762, 1097-4679},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {J Clin Psychol},
pages = {jclp.23750},
abstract = {ABSTRACT
Virtual reality exposure therapy (VRE) has been used in the treatment of combat‐related PTSD since the late 1990s and was recently adapted to treat PTSD due to military sexual trauma (MST). With content specifically tailored to MST‐related contexts, we present the case study of a military veteran who participated in the open clinical trial examining the feasibility of VRE in the treatment of MST‐related PTSD (Loucks et al. 2019). We illustrate VRE's use in activating the trauma memory to facilitate therapeutic emotional processing across sessions and overall symptom reduction. The case study includes common challenges that may occur during VRE and relevant recommendations. The discussion will include lessons learned from the case study and the open clinical trial, recommendations for the flexible application of VRE, and the ongoing developments in the latest version of the VRE system, informed by feedback acquired from the clinicians and patients who experienced it in the initial clinical trial.},
keywords = {DTIC, MedVR},
pubstate = {published},
tppubtype = {article}
}
Virtual reality exposure therapy (VRE) has been used in the treatment of combat‐related PTSD since the late 1990s and was recently adapted to treat PTSD due to military sexual trauma (MST). With content specifically tailored to MST‐related contexts, we present the case study of a military veteran who participated in the open clinical trial examining the feasibility of VRE in the treatment of MST‐related PTSD (Loucks et al. 2019). We illustrate VRE's use in activating the trauma memory to facilitate therapeutic emotional processing across sessions and overall symptom reduction. The case study includes common challenges that may occur during VRE and relevant recommendations. The discussion will include lessons learned from the case study and the open clinical trial, recommendations for the flexible application of VRE, and the ongoing developments in the latest version of the VRE system, informed by feedback acquired from the clinicians and patients who experienced it in the initial clinical trial.
Hills, Mellanie; Korjian, Serge; Chi, Gerald; Natale, Andrea; Saxon, Leslie; Ferdinand, Keith; Kwaku, Kevin; Brancato, Scott; Baca-Motes, Katie; Steinhubl, Steve; Wessler, Jeff; Goldberg, Nieca; Asthana, Anisha; Shute, Kate; Applebaum, Jill; Doran, Kathleen; Nikolovski, Janeta; Kaul, Simrati; Wentworth, Dereck; Damaraju, Cv; DeFalco, Frank; Tavakoli, Cammie; Patel, Mithun; Curtis, Anne; Spertus, John; Gibson, Charles
Insights for Direct-to-Patient Clinical Trial Recruitment Strategies From the Heartline Study Journal Article
In: Circulation, vol. 150, no. Suppl_1, 2024, ISSN: 0009-7322, 1524-4539.
Abstract | Links | BibTeX | Tags: CBC
@article{hills_insights_2024,
title = {Insights for Direct-to-Patient Clinical Trial Recruitment Strategies From the Heartline Study},
author = {Mellanie Hills and Serge Korjian and Gerald Chi and Andrea Natale and Leslie Saxon and Keith Ferdinand and Kevin Kwaku and Scott Brancato and Katie Baca-Motes and Steve Steinhubl and Jeff Wessler and Nieca Goldberg and Anisha Asthana and Kate Shute and Jill Applebaum and Kathleen Doran and Janeta Nikolovski and Simrati Kaul and Dereck Wentworth and Cv Damaraju and Frank DeFalco and Cammie Tavakoli and Mithun Patel and Anne Curtis and John Spertus and Charles Gibson},
url = {https://www.ahajournals.org/doi/10.1161/circ.150.suppl_1.4143017},
doi = {10.1161/circ.150.suppl_1.4143017},
issn = {0009-7322, 1524-4539},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Circulation},
volume = {150},
number = {Suppl_1},
abstract = {Background:
Decentralized clinical trials using direct-to-participant recruitment can potentially engage large, representative participant pools.
Research Question:
Can a decentralized clinical trial use a multichannel approach to recruit patients >65 years old across the United States?
Goals/Aims:
To share insights on multichannel strategies for participant recruitment in the decentralized, app-based Heartline study.
Methods:
Heartline is a randomized trial testing the impact of a mobile app-based heart health program with the electrocardiogram (ECG) and Irregular Rhythm Notification (IRN) features on Apple Watch for early diagnosis, treatment, and outcomes of atrial fibrillation. Eligible participants were US adults aged ≥65 years with an iPhone and Medicare coverage. Multiple pathways for broad outreach were explored, including digital (eg, email, social media) and traditional channels (eg, direct mail, community outreach). Recruitment efforts were assessed and refined to reach a large eligible population.
Results:
A multichannel approach led to textasciitilde300,000 Heartline study app installations. In total, 34,244 participants completed enrollment (Feb 2020-Dec 2022), of whom 28,155 completed baseline demographic assessments. Participants were widely distributed geographically, with notable representation of outlying and rural areas (
Figure 1
). Women accounted for 54% of the participants. Overall, most participants were White (93.0%), with Asian, Black, and Hispanic participants representing 2.8%, 2.7%, and 2.5%, respectively.
Conclusion:
The Heartline study demonstrated the ability to recruit large numbers of participants aged ≥65 years using a direct-to-participant approach. Broad outreach strategies ensured gender and geographic diversity, enrolling a higher percentage of women than typical cardiology trials, and participation from rural areas. However, underrepresentation across racial/ethnic groups persisted and strategies to increase enrollment are needed. For similar trials, a strategic multichannel approach, with strong data and analytics capabilities may be beneficial to effectively target and enroll eligible participants.},
keywords = {CBC},
pubstate = {published},
tppubtype = {article}
}
Decentralized clinical trials using direct-to-participant recruitment can potentially engage large, representative participant pools.
Research Question:
Can a decentralized clinical trial use a multichannel approach to recruit patients >65 years old across the United States?
Goals/Aims:
To share insights on multichannel strategies for participant recruitment in the decentralized, app-based Heartline study.
Methods:
Heartline is a randomized trial testing the impact of a mobile app-based heart health program with the electrocardiogram (ECG) and Irregular Rhythm Notification (IRN) features on Apple Watch for early diagnosis, treatment, and outcomes of atrial fibrillation. Eligible participants were US adults aged ≥65 years with an iPhone and Medicare coverage. Multiple pathways for broad outreach were explored, including digital (eg, email, social media) and traditional channels (eg, direct mail, community outreach). Recruitment efforts were assessed and refined to reach a large eligible population.
Results:
A multichannel approach led to textasciitilde300,000 Heartline study app installations. In total, 34,244 participants completed enrollment (Feb 2020-Dec 2022), of whom 28,155 completed baseline demographic assessments. Participants were widely distributed geographically, with notable representation of outlying and rural areas (
Figure 1
). Women accounted for 54% of the participants. Overall, most participants were White (93.0%), with Asian, Black, and Hispanic participants representing 2.8%, 2.7%, and 2.5%, respectively.
Conclusion:
The Heartline study demonstrated the ability to recruit large numbers of participants aged ≥65 years using a direct-to-participant approach. Broad outreach strategies ensured gender and geographic diversity, enrolling a higher percentage of women than typical cardiology trials, and participation from rural areas. However, underrepresentation across racial/ethnic groups persisted and strategies to increase enrollment are needed. For similar trials, a strategic multichannel approach, with strong data and analytics capabilities may be beneficial to effectively target and enroll eligible participants.
Chen, Meida; Han, Kangle; Yu, Zifan; Feng, Andrew; Hou, Yu; You, Suya; Soibelman, Lucio
An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation Journal Article
In: Remote Sensing, vol. 16, no. 22, pp. 4240, 2024, ISSN: 2072-4292.
Abstract | Links | BibTeX | Tags: DTIC, VGL
@article{chen_aerial_2024,
title = {An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation},
author = {Meida Chen and Kangle Han and Zifan Yu and Andrew Feng and Yu Hou and Suya You and Lucio Soibelman},
url = {https://www.mdpi.com/2072-4292/16/22/4240},
doi = {10.3390/rs16224240},
issn = {2072-4292},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Remote Sensing},
volume = {16},
number = {22},
pages = {4240},
abstract = {The recent surge in diverse 3D datasets spanning various scales and applications marks a significant advancement in the field. However, the comprehensive process of data acquisition, refinement, and annotation at a large scale poses a formidable challenge, particularly for individual researchers and small teams. To this end, we present a novel synthetic 3D point cloud generation framework that can produce detailed outdoor aerial photogrammetric 3D datasets with accurate ground truth annotations without the labor-intensive and time-consuming data collection/annotation processes. Our pipeline procedurally generates synthetic environments, mirroring real-world data collection and 3D reconstruction processes. A key feature of our framework is its ability to replicate consistent quality, noise patterns, and diversity similar to real-world datasets. This is achieved by adopting UAV flight patterns that resemble those used in real-world data collection processes (e.g., the cross-hatch flight pattern) across various synthetic terrains that are procedurally generated, thereby ensuring data consistency akin to real-world scenarios. Moreover, the generated datasets are enriched with precise semantic and instance annotations, eliminating the need for manual labeling. Our approach has led to the development and release of the Semantic Terrain Points Labeling—Synthetic 3D (STPLS3D) benchmark, an extensive outdoor 3D dataset encompassing over 16 km2, featuring up to 19 semantic labels. We also collected, reconstructed, and annotated four real-world datasets for validation purposes. Extensive experiments on these datasets demonstrate our synthetic datasets’ effectiveness, superior quality, and their value as a benchmark dataset for further point cloud research.},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {article}
}
Bonial, Claire; Lukin, Stephanie M.; Abrams, Mitchell; Baker, Anthony; Donatelli, Lucia; Foots, Ashley; Hayes, Cory J.; Henry, Cassidy; Hudson, Taylor; Marge, Matthew; Pollard, Kimberly A.; Artstein, Ron; Traum, David; Voss, Clare R.
Human–robot dialogue annotation for multi-modal common ground Journal Article
In: Lang Resources & Evaluation, 2024, ISSN: 1574-020X, 1574-0218.
Links | BibTeX | Tags: DTIC, Virtual Humans
@article{bonial_humanrobot_2024,
title = {Human–robot dialogue annotation for multi-modal common ground},
author = {Claire Bonial and Stephanie M. Lukin and Mitchell Abrams and Anthony Baker and Lucia Donatelli and Ashley Foots and Cory J. Hayes and Cassidy Henry and Taylor Hudson and Matthew Marge and Kimberly A. Pollard and Ron Artstein and David Traum and Clare R. Voss},
url = {https://link.springer.com/10.1007/s10579-024-09784-2},
doi = {10.1007/s10579-024-09784-2},
issn = {1574-020X, 1574-0218},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Lang Resources & Evaluation},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Marti, Deniz; Budathoki, Anjila; Ding, Yi; Lucas, Gale; Nelson, David
How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations? Journal Article
In: International Journal of Human–Computer Interaction, pp. 1–12, 2024, ISSN: 1044-7318, 1532-7590.
Links | BibTeX | Tags: DTIC, Virtual Humans
@article{marti_how_2024,
title = {How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations?},
author = {Deniz Marti and Anjila Budathoki and Yi Ding and Gale Lucas and David Nelson},
url = {https://www.tandfonline.com/doi/full/10.1080/10447318.2024.2426035},
doi = {10.1080/10447318.2024.2426035},
issn = {1044-7318, 1532-7590},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {International Journal of Human–Computer Interaction},
pages = {1–12},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Vlake, Johan H; Drop, Denzel L Q; Bommel, Jasper Van; Riva, Giuseppe; Wiederhold, Brenda K; Cipresso, Pietro; Rizzo, Albert S; Rothbaum, Barbara O; Botella, Cristina; Hooft, Lotty; Bienvenu, Oscar J; Jung, Christian; Geerts, Bart; Wils, Evert-Jan; Gommers, Diederik; Genderen, Michel E Van; Group, RATE-XR Expert
Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline Journal Article
In: J Med Internet Res, vol. 26, pp. e56790, 2024, ISSN: 1438-8871.
Abstract | Links | BibTeX | Tags: MedVR
@article{vlake_reporting_2024,
title = {Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline},
author = {Johan H Vlake and Denzel L Q Drop and Jasper Van Bommel and Giuseppe Riva and Brenda K Wiederhold and Pietro Cipresso and Albert S Rizzo and Barbara O Rothbaum and Cristina Botella and Lotty Hooft and Oscar J Bienvenu and Christian Jung and Bart Geerts and Evert-Jan Wils and Diederik Gommers and Michel E Van Genderen and RATE-XR Expert Group},
url = {https://www.jmir.org/2024/1/e56790},
doi = {10.2196/56790},
issn = {1438-8871},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {J Med Internet Res},
volume = {26},
pages = {e56790},
abstract = {Background
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.
Roemmele, Melissa; Gordon, Andrew S.
From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC, Learning Sciences
@misc{roemmele_test-taking_2024-1,
title = {From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items},
author = {Melissa Roemmele and Andrew S. Gordon},
url = {https://arxiv.org/abs/2410.14897},
doi = {10.48550/ARXIV.2410.14897},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-05},
publisher = {arXiv},
abstract = {LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.},
note = {Version Number: 1},
keywords = {DTIC, Learning Sciences},
pubstate = {published},
tppubtype = {misc}
}
Lin, Spencer; Rizk, Basem; Jun, Miru; Artze, Andy; Sullivan, Caitlin; Mozgai, Sharon; Fisher, Scott
Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents Miscellaneous
2024, (arXiv:2410.20116 [cs]).
Abstract | Links | BibTeX | Tags: Virtual Agents
@misc{lin_estuary_2024,
title = {Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents},
author = {Spencer Lin and Basem Rizk and Miru Jun and Andy Artze and Caitlin Sullivan and Sharon Mozgai and Scott Fisher},
url = {http://arxiv.org/abs/2410.20116},
doi = {10.1145/3652988.3696198},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
abstract = {The rise in capability and ubiquity of generative artificial intelligence (AI) technologies has enabled its application to the field of Socially Interactive Agents (SIAs). Despite rising interest in modern AI-powered components used for real-time SIA research, substantial friction remains due to the absence of a standardized and universal SIA framework. To target this absence, we developed Estuary: a multimodal (text, audio, and soon video) framework which facilitates the development of low-latency, real-time SIAs. Estuary seeks to reduce repeat work between studies and to provide a flexible platform that can be run entirely off-cloud to maximize configurability, controllability, reproducibility of studies, and speed of agent response times. We are able to do this by constructing a robust multimodal framework which incorporates current and future components seamlessly into a modular and interoperable architecture.},
note = {arXiv:2410.20116 [cs]},
keywords = {Virtual Agents},
pubstate = {published},
tppubtype = {misc}
}
Tran, Minh; Kim, Yelin; Su, Che-Chun; Kuo, Cheng-Hao; Sun, Min; Soleymani, Mohammad
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, vol. 15138, pp. 1–19, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-72988-1 978-3-031-72989-8, (Series Title: Lecture Notes in Computer Science).
@incollection{leonardis_ex2eg-mae_2024,
title = {Ex2Eg-MAE: A Framework for Adaptation of Exocentric Video Masked Autoencoders for Egocentric Social Role Understanding},
author = {Minh Tran and Yelin Kim and Che-Chun Su and Cheng-Hao Kuo and Min Sun and Mohammad Soleymani},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72989-8_1},
doi = {10.1007/978-3-031-72989-8_1},
isbn = {978-3-031-72988-1 978-3-031-72989-8},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
booktitle = {Computer Vision – ECCV 2024},
volume = {15138},
pages = {1–19},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC},
pubstate = {published},
tppubtype = {incollection}
}
Chen, Gonglin; Wu, Jinsen; Chen, Haiwei; Teng, Wenbin; Gao, Zhiyuan; Feng, Andrew; Qin, Rongjun; Zhao, Yajie
Geometry-aware Feature Matching for Large-Scale Structure from Motion Miscellaneous
2024, (Version Number: 3).
Abstract | Links | BibTeX | Tags: DTIC
@misc{chen_geometry-aware_2024,
title = {Geometry-aware Feature Matching for Large-Scale Structure from Motion},
author = {Gonglin Chen and Jinsen Wu and Haiwei Chen and Wenbin Teng and Zhiyuan Gao and Andrew Feng and Rongjun Qin and Yajie Zhao},
url = {https://arxiv.org/abs/2409.02310},
doi = {10.48550/ARXIV.2409.02310},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Establishing consistent and dense correspondences across multiple images is crucial for Structure from Motion (SfM) systems. Significant view changes, such as air-to-ground with very sparse view overlap, pose an even greater challenge to the correspondence solvers. We present a novel optimization-based approach that significantly enhances existing feature matching methods by introducing geometry cues in addition to color cues. This helps fill gaps when there is less overlap in large-scale scenarios. Our method formulates geometric verification as an optimization problem, guiding feature matching within detector-free methods and using sparse correspondences from detector-based methods as anchor points. By enforcing geometric constraints via the Sampson Distance, our approach ensures that the denser correspondences from detector-free methods are geometrically consistent and more accurate. This hybrid strategy significantly improves correspondence density and accuracy, mitigates multi-view inconsistencies, and leads to notable advancements in camera pose accuracy and point cloud density. It outperforms state-of-the-art feature matching methods on benchmark datasets and enables feature matching in challenging extreme large-scale settings.},
note = {Version Number: 3},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Pitfalls of Embodiment in Human-Agent Experiment Design Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–9, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_pitfalls_2024,
title = {Pitfalls of Embodiment in Human-Agent Experiment Design},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3673958},
doi = {10.1145/3652988.3673958},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–9},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {DTIC},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, Zhiyuan; Teng, Wenbin; Chen, Gonglin; Wu, Jinsen; Xu, Ningli; Qin, Rongjun; Feng, Andrew; Zhao, Yajie
Skyeyes: Ground Roaming using Aerial View Images Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC
@misc{gao_skyeyes_2024,
title = {Skyeyes: Ground Roaming using Aerial View Images},
author = {Zhiyuan Gao and Wenbin Teng and Gonglin Chen and Jinsen Wu and Ningli Xu and Rongjun Qin and Andrew Feng and Yajie Zhao},
url = {https://arxiv.org/abs/2409.16685},
doi = {10.48550/ARXIV.2409.16685},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Integrating aerial imagery-based scene generation into applications like autonomous driving and gaming enhances realism in 3D environments, but challenges remain in creating detailed content for occluded areas and ensuring real-time, consistent rendering. In this paper, we introduce Skyeyes, a novel framework that can generate photorealistic sequences of ground view images using only aerial view inputs, thereby creating a ground roaming experience. More specifically, we combine a 3D representation with a view consistent generation model, which ensures coherence between generated images. This method allows for the creation of geometrically consistent ground view images, even with large view gaps. The images maintain improved spatial-temporal coherence and realism, enhancing scene comprehension and visualization from aerial perspectives. To the best of our knowledge, there are no publicly available datasets that contain pairwise geo-aligned aerial and ground view imagery. Therefore, we build a large, synthetic, and geo-aligned dataset using Unreal Engine. Both qualitative and quantitative analyses on this synthetic dataset display superior results compared to other leading synthesis approaches. See the project page for more results: https://chaoren2357.github.io/website-skyeyes/.},
note = {Version Number: 1},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Integration of LLMs with Virtual Character Embodiment Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–3, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_integration_2024,
title = {Integration of LLMs with Virtual Character Embodiment},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3696199},
doi = {10.1145/3652988.3696199},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–3},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi
Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems Proceedings Article
In: Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 610–623, Association for Computational Linguistics, Kyoto, Japan, 2024.
Links | BibTeX | Tags: Dialogue, DTIC, Natural Language
@inproceedings{georgila_comparing_2024,
title = {Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://aclanthology.org/2024.sigdial-1.52},
doi = {10.18653/v1/2024.sigdial-1.52},
year = {2024},
date = {2024-09-01},
urldate = {2024-10-15},
booktitle = {Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue},
pages = {610–623},
publisher = {Association for Computational Linguistics},
address = {Kyoto, Japan},
keywords = {Dialogue, DTIC, Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Wang, Changzhao; Aguilar, Stephen J.; Bankard, Jennifer S.; Bui, Eric; Nye, Benjamin
Writing with AI: What College Students Learned from Utilizing ChatGPT for a Writing Assignment Journal Article
In: Education Sciences, vol. 14, no. 9, pp. 976, 2024, ISSN: 2227-7102, (Publisher: MDPI AG).
Abstract | Links | BibTeX | Tags: Learning Sciences
@article{wang_writing_2024,
title = {Writing with AI: What College Students Learned from Utilizing ChatGPT for a Writing Assignment},
author = {Changzhao Wang and Stephen J. Aguilar and Jennifer S. Bankard and Eric Bui and Benjamin Nye},
url = {https://www.mdpi.com/2227-7102/14/9/976},
doi = {10.3390/educsci14090976},
issn = {2227-7102},
year = {2024},
date = {2024-09-01},
urldate = {2024-09-17},
journal = {Education Sciences},
volume = {14},
number = {9},
pages = {976},
abstract = {To support the integration of AI in education, this empirical study investigated what lessons college students learned from using Generative AI for writing. We recruited 47 students in the United States from a university writing course. Students completed an assignment in which they used Generative AI tools (e.g., ChatGPT) to draft an application letter or personal statement. Data were collected using a survey of five open-ended questions about their writing process, what worked, what did not work, how to better write with AI, and general lessons learned. We applied thematic analysis and sentiment analysis methods to analyze students’ responses. Results show that (1) students went through multiple rounds of prompting; (2) students identified strengths of AI, such as connection to topic, template generation, and sentence quality; (3) the weaknesses of AI included general language, robotic tone and lacking emotion, lacking personal voice, and lacking critical thinking; (4) students wished to improve AI-generated writing by adding personal stories, connections to posting, feelings and thoughts, and deleting repetitive language; and (5) their overall attitudes toward AI tool were positive. We believe our findings can help relieve some concerns about cheating with AI. We also suggested strategies to regulate the use of AI.},
note = {Publisher: MDPI AG},
keywords = {Learning Sciences},
pubstate = {published},
tppubtype = {article}
}
Lucas, Gale M.; Becerik-Gerber, Burcin; Roll, Shawn C.
Calibrating workers’ trust in intelligent automated systems Journal Article
In: Patterns, vol. 5, no. 9, pp. 101045, 2024, ISSN: 2666-3899, (Publisher: Elsevier BV).
@article{lucas_calibrating_2024,
title = {Calibrating workers’ trust in intelligent automated systems},
author = {Gale M. Lucas and Burcin Becerik-Gerber and Shawn C. Roll},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2666389924001879},
doi = {10.1016/j.patter.2024.101045},
issn = {2666-3899},
year = {2024},
date = {2024-09-01},
urldate = {2024-09-17},
journal = {Patterns},
volume = {5},
number = {9},
pages = {101045},
note = {Publisher: Elsevier BV},
keywords = {DTIC},
pubstate = {published},
tppubtype = {article}
}
Liu, Xiao; Lei, Xuanyu; Wang, Shengyuan; Huang, Yue; Feng, Zhuoer; Wen, Bosi; Cheng, Jiale; Ke, Pei; Xu, Yifan; Tam, Weng Lam; Zhang, Xiaohan; Sun, Lichao; Gu, Xiaotao; Wang, Hongning; Zhang, Jing; Huang, Minlie; Dong, Yuxiao; Tang, Jie
AlignBench: Benchmarking Chinese Alignment of Large Language Models Miscellaneous
2024, (arXiv:2311.18743 [cs]).
Abstract | Links | BibTeX | Tags: Machine Learning
@misc{liu_alignbench_2024,
title = {AlignBench: Benchmarking Chinese Alignment of Large Language Models},
author = {Xiao Liu and Xuanyu Lei and Shengyuan Wang and Yue Huang and Zhuoer Feng and Bosi Wen and Jiale Cheng and Pei Ke and Yifan Xu and Weng Lam Tam and Xiaohan Zhang and Lichao Sun and Xiaotao Gu and Hongning Wang and Jing Zhang and Minlie Huang and Yuxiao Dong and Jie Tang},
url = {http://arxiv.org/abs/2311.18743},
doi = {10.48550/arXiv.2311.18743},
year = {2024},
date = {2024-08-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Alignment has become a critical step for instruction-tuned Large Language Models (LLMs) to become helpful assistants. However, the effective evaluation of alignment for emerging Chinese LLMs is still largely unexplored. To fill in this gap, we introduce AlignBench, a comprehensive multi-dimensional benchmark for evaluating LLMs' alignment in Chinese. We design a human-in-the-loop data curation pipeline, containing eight main categories, 683 real-scenario rooted queries and corresponding human verified references. To ensure the correctness of references, each knowledge-intensive query is accompanied with evidences collected from reliable web sources (including URLs and quotations) by our annotators. For automatic evaluation, our benchmark employs a rule-calibrated multi-dimensional LLM-as-Judgetextasciitildetextbackslashcitezheng2023judging approach with Chain-of-Thought to generate explanations and final ratings, ensuring high reliability and interpretability. All evaluation code, data, and LLM generations are available at textbackslashurlhttps://github.com/THUDM/AlignBench. Since its release, AlignBench has been adopted by top (Chinese) LLMs for evaluating their alignment capabilities in Chinese, including ChatGLM, Qwen, DeepSeek, Yi, Baichuan, and Abab.},
note = {arXiv:2311.18743 [cs]},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {misc}
}
Fischer, Katrin; Velentza, Anna-Maria; Lucas, Gale; Williams, Dmitri
Seeing Eye to Eye with Robots: An Experimental Study Predicting Trust in Social Robots for Domestic Use Proceedings Article
In: 2024 33rd IEEE International Conference on Robot and Human Interactive Communication (ROMAN), pp. 2162–2168, IEEE, Pasadena, CA, USA, 2024, ISBN: 979-8-3503-7502-2.
Links | BibTeX | Tags: DTIC, Virtual Humans
@inproceedings{fischer_seeing_2024,
title = {Seeing Eye to Eye with Robots: An Experimental Study Predicting Trust in Social Robots for Domestic Use},
author = {Katrin Fischer and Anna-Maria Velentza and Gale Lucas and Dmitri Williams},
url = {https://ieeexplore.ieee.org/document/10731371/},
doi = {10.1109/RO-MAN60168.2024.10731371},
isbn = {979-8-3503-7502-2},
year = {2024},
date = {2024-08-01},
urldate = {2024-12-05},
booktitle = {2024 33rd IEEE International Conference on Robot and Human Interactive Communication (ROMAN)},
pages = {2162–2168},
publisher = {IEEE},
address = {Pasadena, CA, USA},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {inproceedings}
}
Zaizar, Eric D.; Gramlich, Michael A.; Rizzo, Albert “Skip”; Reger, Greg M.; Norr, Aaron M.
In: Training and Education in Professional Psychology, 2024, ISSN: 1931-3926, 1931-3918.
Links | BibTeX | Tags: Virtual Humans
@article{zaizar_exploration_2024,
title = {Exploration of the impact of baseline clinician learner characteristics on motivational interviewing skill improvement following training with a virtual standardized patient.},
author = {Eric D. Zaizar and Michael A. Gramlich and Albert “Skip” Rizzo and Greg M. Reger and Aaron M. Norr},
url = {https://doi.apa.org/doi/10.1037/tep0000490},
doi = {10.1037/tep0000490},
issn = {1931-3926, 1931-3918},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-13},
journal = {Training and Education in Professional Psychology},
keywords = {Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Bodner, Ehud; Mikulincer, Mario; McMahon, Elizabeth; Rizzo, Albert
Reviving life that has ceased on October the 7th: an attachment perspective on a virtual reality intervention Journal Article
In: Front. Virtual Real., vol. 5, pp. 1438663, 2024, ISSN: 2673-4192.
Abstract | Links | BibTeX | Tags: MedVR
@article{bodner_reviving_2024,
title = {Reviving life that has ceased on October the 7th: an attachment perspective on a virtual reality intervention},
author = {Ehud Bodner and Mario Mikulincer and Elizabeth McMahon and Albert Rizzo},
url = {https://www.frontiersin.org/articles/10.3389/frvir.2024.1438663/full},
doi = {10.3389/frvir.2024.1438663},
issn = {2673-4192},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-15},
journal = {Front. Virtual Real.},
volume = {5},
pages = {1438663},
abstract = {Unfortunately, in recent years, wars have forced many civilians to evacuate their homes and move to safe zones. The event of October the seventh that took place in many Kibbutzim near the Gaza strip, exposed families who were on a Jewish holiday, to the murder of family and community members. They had to leave their burned houses and move to hotels and apartment buildings in other parts of Israel. Many people, also from the Northen parts of the country, are still in new safe zones, and have huge difficulties in returning to their houses (and not only because of objective security reasons). In this “perspective” article we propose a Virtual Reality (VR) application, which is based on past and current research in the fields of attachment theory and traumatic grief. We propose that in addition to the use of exposure therapy, a VR simulation which will activate the attachment system, can reorganize the evacuees’ figure and place attachment representations. We suggest that such a simulation will revive the evacuees’ sense of safe-haven and secure base and enable them to return to their home place, or to adjust to a new place, thereby leading to optimal adjustment. We start with a presentation of the theory of attachment, place attachment, attachment and loss and the two-track model of bereavement. Then, we describe the design of our VR intervention that aims to address this challenge from the attachment theory perspective with the evacuees. Finally, we discuss the challenges that need to be dealt with to implement the VR interventions through resilience centers in Israel.},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Han, Bin; Yau, Cleo; Lei, Su; Gratch, Jonathan
Knowledge-based Emotion Recognition using Large Language Models Miscellaneous
2024, (arXiv:2408.04123 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Emotions
@misc{han_knowledge-based_2024,
title = {Knowledge-based Emotion Recognition using Large Language Models},
author = {Bin Han and Cleo Yau and Su Lei and Jonathan Gratch},
url = {http://arxiv.org/abs/2408.04123},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Emotion recognition in social situations is a complex task that requires integrating information from both facial expressions and the situational context. While traditional approaches to automatic emotion recognition have focused on decontextualized signals, recent research emphasizes the importance of context in shaping emotion perceptions. This paper contributes to the emerging field of context-based emotion recognition by leveraging psychological theories of human emotion perception to inform the design of automated methods. We propose an approach that combines emotion recognition methods with Bayesian Cue Integration (BCI) to integrate emotion inferences from decontextualized facial expressions and contextual knowledge inferred via Large-language Models. We test this approach in the context of interpreting facial expressions during a social task, the prisoner's dilemma. Our results provide clear support for BCI across a range of automatic emotion recognition methods. The best automated method achieved results comparable to human observers, suggesting the potential for this approach to advance the field of affective computing.},
note = {arXiv:2408.04123 [cs]},
keywords = {DTIC, Emotions},
pubstate = {published},
tppubtype = {misc}
}
Parga, Madeline R.; Roll, Shawn C.; Lucas, Gale M.; Becerik-Gerber, Burcin; Naranayan, Shrikanth
Differences in Self-Rated Worker Outcomes Across Stress States: An Interim Analysis of Hybrid Worker Data Journal Article
In: Proceedings of the Human Factors and Ergonomics Society Annual Meeting, 2024, ISSN: 1071-1813, 2169-5067, (Publisher: SAGE Publications).
Abstract | Links | BibTeX | Tags:
@article{parga_differences_2024,
title = {Differences in Self-Rated Worker Outcomes Across Stress States: An Interim Analysis of Hybrid Worker Data},
author = {Madeline R. Parga and Shawn C. Roll and Gale M. Lucas and Burcin Becerik-Gerber and Shrikanth Naranayan},
url = {https://journals.sagepub.com/doi/10.1177/10711813241275500},
doi = {10.1177/10711813241275500},
issn = {1071-1813, 2169-5067},
year = {2024},
date = {2024-08-01},
urldate = {2024-09-17},
journal = {Proceedings of the Human Factors and Ergonomics Society Annual Meeting},
abstract = {Stress experiences can have dire consequences for worker performance and well-being, and the social environment of the workplace is a key contributor to worker experience. This study investigated the relationship between hybrid workers’ self-ratings of productivity, mood, and stress with perceptions of positive (eustress) and negative (distress) stress states. We hypothesized that self-ratings would vary across combinations of eustress and distress experiences and that these differences would differ based on the social context. Ecological momentary assessments (EMA) were used to obtain ecologically valid data at four data points each workday across a 4-month study period in a cohort of seven office workers. Findings aligned with the Yerkes–Dodson law, such that higher states of arousal were associated with greater self-perceived productivity, and higher stress magnitudes were found when distress existed. Compared to other states, eustress was associated with higher productivity in work-related activities and better mood across all activity types.},
note = {Publisher: SAGE Publications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}