Publications
Search
Murray, Benjamin; Brown, Richard; Ma, Pengcheng; Kerfoot, Eric; Xu, Daguang; Feng, Andrew; Cardoso, Jorge; Ourselin, Sebastien; Modat, Marc
Lazy Resampling: Fast and information preserving preprocessing for deep learning Journal Article
In: Computer Methods and Programs in Biomedicine, vol. 257, pp. 108422, 2024, ISSN: 01692607.
@article{murray_lazy_2024,
title = {Lazy Resampling: Fast and information preserving preprocessing for deep learning},
author = {Benjamin Murray and Richard Brown and Pengcheng Ma and Eric Kerfoot and Daguang Xu and Andrew Feng and Jorge Cardoso and Sebastien Ourselin and Marc Modat},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0169260724004152},
doi = {10.1016/j.cmpb.2024.108422},
issn = {01692607},
year = {2024},
date = {2024-12-01},
urldate = {2025-01-16},
journal = {Computer Methods and Programs in Biomedicine},
volume = {257},
pages = {108422},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tran, Minh; Chang, Di; Siniukov, Maksim; Soleymani, Mohammad
DIM: Dyadic Interaction Modeling for Social Behavior Generation Book Section
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, vol. 15095, pp. 484–503, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-72912-6 978-3-031-72913-3, (Series Title: Lecture Notes in Computer Science).
@incollection{leonardis_dim_2024,
title = {DIM: Dyadic Interaction Modeling for Social Behavior Generation},
author = {Minh Tran and Di Chang and Maksim Siniukov and Mohammad Soleymani},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72913-3_27},
doi = {10.1007/978-3-031-72913-3_27},
isbn = {978-3-031-72912-6 978-3-031-72913-3},
year = {2024},
date = {2024-12-01},
urldate = {2025-01-16},
booktitle = {Computer Vision – ECCV 2024},
volume = {15095},
pages = {484–503},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Xu, Jiuyi; Chen, Meida; Feng, Andrew; Yu, Zifan; Shi, Yangming
Open-Vocabulary High-Resolution 3D (OVHR3D) Data Segmentation and Annotation Framework Journal Article
In: 2024, (Publisher: arXiv Version Number: 2).
@article{xu_open-vocabulary_2024,
title = {Open-Vocabulary High-Resolution 3D (OVHR3D) Data Segmentation and Annotation Framework},
author = {Jiuyi Xu and Meida Chen and Andrew Feng and Zifan Yu and Yangming Shi},
url = {https://arxiv.org/abs/2412.06268},
doi = {10.48550/ARXIV.2412.06268},
year = {2024},
date = {2024-12-01},
urldate = {2024-12-20},
abstract = {In the domain of the U.S. Army modeling and simulation, the availability of high quality annotated 3D data is pivotal to creating virtual environments for training and simulations. Traditional methodologies for 3D semantic and instance segmentation, such as KpConv, RandLA, Mask3D, etc., are designed to train on extensive labeled datasets to obtain satisfactory performance in practical tasks. This requirement presents a significant challenge, given the inherent scarcity of manually annotated 3D datasets, particularly for the military use cases. Recognizing this gap, our previous research leverages the One World Terrain data repository manually annotated databases, as showcased at IITSEC 2019 and 2021, to enrich the training dataset for deep learning models. However, collecting and annotating large scale 3D data for specific tasks remains costly and inefficient. To this end, the objective of this research is to design and develop a comprehensive and efficient framework for 3D segmentation tasks to assist in 3D data annotation. This framework integrates Grounding DINO and Segment anything Model, augmented by an enhancement in 2D image rendering via 3D mesh. Furthermore, the authors have also developed a user friendly interface that facilitates the 3D annotation process, offering intuitive visualization of rendered images and the 3D point cloud.},
note = {Publisher: arXiv
Version Number: 2},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Roemmele, Melissa; Gordon, Andrew
From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items Proceedings Article
In: Findings of the Association for Computational Linguistics: EMNLP 2024, pp. 5193–5203, Association for Computational Linguistics, Miami, Florida, USA, 2024.
@inproceedings{roemmele_test-taking_2024,
title = {From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items},
author = {Melissa Roemmele and Andrew Gordon},
url = {https://aclanthology.org/2024.findings-emnlp.299},
doi = {10.18653/v1/2024.findings-emnlp.299},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2024},
pages = {5193–5203},
publisher = {Association for Computational Linguistics},
address = {Miami, Florida, USA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhu, Xin; Su, Zhenghui; Gratch, Jonathan; Culbertson, Heather
How Visualizing Touch Can Transform Perceptions of Intensity, Realism, and Emotion? Book Section
In: Kajimoto, Hiroyuki; Lopes, Pedro; Pacchierotti, Claudio; Basdogan, Cagatay; Gori, Monica; Lemaire-Semail, Betty; Marchal, Maud (Ed.): Haptics: Understanding Touch; Technology and Systems; Applications and Interaction, vol. 14768, pp. 194–207, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-70057-6 978-3-031-70058-3, (Series Title: Lecture Notes in Computer Science).
@incollection{kajimoto_how_2024,
title = {How Visualizing Touch Can Transform Perceptions of Intensity, Realism, and Emotion?},
author = {Xin Zhu and Zhenghui Su and Jonathan Gratch and Heather Culbertson},
editor = {Hiroyuki Kajimoto and Pedro Lopes and Claudio Pacchierotti and Cagatay Basdogan and Monica Gori and Betty Lemaire-Semail and Maud Marchal},
url = {https://link.springer.com/10.1007/978-3-031-70058-3_16},
doi = {10.1007/978-3-031-70058-3_16},
isbn = {978-3-031-70057-6 978-3-031-70058-3},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Haptics: Understanding Touch; Technology and Systems; Applications and Interaction},
volume = {14768},
pages = {194–207},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Siniukov, Maksim; Yin, Yufeng; Fast, Eli; Qi, Yingshan; Monga, Aarav; Kim, Audrey; Soleymani, Mohammad
SEMPI: A Database for Understanding Social Engagement in Video-Mediated Multiparty Interaction Proceedings Article
In: International Conference on Multimodel Interaction, pp. 546–555, ACM, San Jose Costa Rica, 2024, ISBN: 979-8-4007-0462-8.
@inproceedings{siniukov_sempi_2024,
title = {SEMPI: A Database for Understanding Social Engagement in Video-Mediated Multiparty Interaction},
author = {Maksim Siniukov and Yufeng Yin and Eli Fast and Yingshan Qi and Aarav Monga and Audrey Kim and Mohammad Soleymani},
url = {https://dl.acm.org/doi/10.1145/3678957.3685752},
doi = {10.1145/3678957.3685752},
isbn = {979-8-4007-0462-8},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {International Conference on Multimodel Interaction},
pages = {546–555},
publisher = {ACM},
address = {San Jose Costa Rica},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Andalibi, Nazanin; Stark, Luke; McDuff, Daniel; Picard, Rosalind; Gratch, Jonathan; Howell, Noura
What should we do with Emotion AI? Towards an Agenda for the Next 30 Years Proceedings Article
In: Companion Publication of the 2024 Conference on Computer-Supported Cooperative Work and Social Computing, pp. 98–101, ACM, San Jose Costa Rica, 2024, ISBN: 979-8-4007-1114-5.
@inproceedings{andalibi_what_2024,
title = {What should we do with Emotion AI? Towards an Agenda for the Next 30 Years},
author = {Nazanin Andalibi and Luke Stark and Daniel McDuff and Rosalind Picard and Jonathan Gratch and Noura Howell},
url = {https://dl.acm.org/doi/10.1145/3678884.3689135},
doi = {10.1145/3678884.3689135},
isbn = {979-8-4007-1114-5},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
booktitle = {Companion Publication of the 2024 Conference on Computer-Supported Cooperative Work and Social Computing},
pages = {98–101},
publisher = {ACM},
address = {San Jose Costa Rica},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Loucks, Laura; Rizzo, Albert; Rothbaum, Barbara O.
Virtual Reality Exposure for Treating PTSD Due to Military Sexual Trauma Journal Article
In: J Clin Psychol, pp. jclp.23750, 2024, ISSN: 0021-9762, 1097-4679.
@article{loucks_virtual_2024,
title = {Virtual Reality Exposure for Treating PTSD Due to Military Sexual Trauma},
author = {Laura Loucks and Albert Rizzo and Barbara O. Rothbaum},
url = {https://onlinelibrary.wiley.com/doi/10.1002/jclp.23750},
doi = {10.1002/jclp.23750},
issn = {0021-9762, 1097-4679},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {J Clin Psychol},
pages = {jclp.23750},
abstract = {ABSTRACT
Virtual reality exposure therapy (VRE) has been used in the treatment of combat‐related PTSD since the late 1990s and was recently adapted to treat PTSD due to military sexual trauma (MST). With content specifically tailored to MST‐related contexts, we present the case study of a military veteran who participated in the open clinical trial examining the feasibility of VRE in the treatment of MST‐related PTSD (Loucks et al. 2019). We illustrate VRE's use in activating the trauma memory to facilitate therapeutic emotional processing across sessions and overall symptom reduction. The case study includes common challenges that may occur during VRE and relevant recommendations. The discussion will include lessons learned from the case study and the open clinical trial, recommendations for the flexible application of VRE, and the ongoing developments in the latest version of the VRE system, informed by feedback acquired from the clinicians and patients who experienced it in the initial clinical trial.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Virtual reality exposure therapy (VRE) has been used in the treatment of combat‐related PTSD since the late 1990s and was recently adapted to treat PTSD due to military sexual trauma (MST). With content specifically tailored to MST‐related contexts, we present the case study of a military veteran who participated in the open clinical trial examining the feasibility of VRE in the treatment of MST‐related PTSD (Loucks et al. 2019). We illustrate VRE's use in activating the trauma memory to facilitate therapeutic emotional processing across sessions and overall symptom reduction. The case study includes common challenges that may occur during VRE and relevant recommendations. The discussion will include lessons learned from the case study and the open clinical trial, recommendations for the flexible application of VRE, and the ongoing developments in the latest version of the VRE system, informed by feedback acquired from the clinicians and patients who experienced it in the initial clinical trial.
Hills, Mellanie; Korjian, Serge; Chi, Gerald; Natale, Andrea; Saxon, Leslie; Ferdinand, Keith; Kwaku, Kevin; Brancato, Scott; Baca-Motes, Katie; Steinhubl, Steve; Wessler, Jeff; Goldberg, Nieca; Asthana, Anisha; Shute, Kate; Applebaum, Jill; Doran, Kathleen; Nikolovski, Janeta; Kaul, Simrati; Wentworth, Dereck; Damaraju, Cv; DeFalco, Frank; Tavakoli, Cammie; Patel, Mithun; Curtis, Anne; Spertus, John; Gibson, Charles
Insights for Direct-to-Patient Clinical Trial Recruitment Strategies From the Heartline Study Journal Article
In: Circulation, vol. 150, no. Suppl_1, 2024, ISSN: 0009-7322, 1524-4539.
@article{hills_insights_2024,
title = {Insights for Direct-to-Patient Clinical Trial Recruitment Strategies From the Heartline Study},
author = {Mellanie Hills and Serge Korjian and Gerald Chi and Andrea Natale and Leslie Saxon and Keith Ferdinand and Kevin Kwaku and Scott Brancato and Katie Baca-Motes and Steve Steinhubl and Jeff Wessler and Nieca Goldberg and Anisha Asthana and Kate Shute and Jill Applebaum and Kathleen Doran and Janeta Nikolovski and Simrati Kaul and Dereck Wentworth and Cv Damaraju and Frank DeFalco and Cammie Tavakoli and Mithun Patel and Anne Curtis and John Spertus and Charles Gibson},
url = {https://www.ahajournals.org/doi/10.1161/circ.150.suppl_1.4143017},
doi = {10.1161/circ.150.suppl_1.4143017},
issn = {0009-7322, 1524-4539},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Circulation},
volume = {150},
number = {Suppl_1},
abstract = {Background:
Decentralized clinical trials using direct-to-participant recruitment can potentially engage large, representative participant pools.
Research Question:
Can a decentralized clinical trial use a multichannel approach to recruit patients >65 years old across the United States?
Goals/Aims:
To share insights on multichannel strategies for participant recruitment in the decentralized, app-based Heartline study.
Methods:
Heartline is a randomized trial testing the impact of a mobile app-based heart health program with the electrocardiogram (ECG) and Irregular Rhythm Notification (IRN) features on Apple Watch for early diagnosis, treatment, and outcomes of atrial fibrillation. Eligible participants were US adults aged ≥65 years with an iPhone and Medicare coverage. Multiple pathways for broad outreach were explored, including digital (eg, email, social media) and traditional channels (eg, direct mail, community outreach). Recruitment efforts were assessed and refined to reach a large eligible population.
Results:
A multichannel approach led to textasciitilde300,000 Heartline study app installations. In total, 34,244 participants completed enrollment (Feb 2020-Dec 2022), of whom 28,155 completed baseline demographic assessments. Participants were widely distributed geographically, with notable representation of outlying and rural areas (
Figure 1
). Women accounted for 54% of the participants. Overall, most participants were White (93.0%), with Asian, Black, and Hispanic participants representing 2.8%, 2.7%, and 2.5%, respectively.
Conclusion:
The Heartline study demonstrated the ability to recruit large numbers of participants aged ≥65 years using a direct-to-participant approach. Broad outreach strategies ensured gender and geographic diversity, enrolling a higher percentage of women than typical cardiology trials, and participation from rural areas. However, underrepresentation across racial/ethnic groups persisted and strategies to increase enrollment are needed. For similar trials, a strategic multichannel approach, with strong data and analytics capabilities may be beneficial to effectively target and enroll eligible participants.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Decentralized clinical trials using direct-to-participant recruitment can potentially engage large, representative participant pools.
Research Question:
Can a decentralized clinical trial use a multichannel approach to recruit patients >65 years old across the United States?
Goals/Aims:
To share insights on multichannel strategies for participant recruitment in the decentralized, app-based Heartline study.
Methods:
Heartline is a randomized trial testing the impact of a mobile app-based heart health program with the electrocardiogram (ECG) and Irregular Rhythm Notification (IRN) features on Apple Watch for early diagnosis, treatment, and outcomes of atrial fibrillation. Eligible participants were US adults aged ≥65 years with an iPhone and Medicare coverage. Multiple pathways for broad outreach were explored, including digital (eg, email, social media) and traditional channels (eg, direct mail, community outreach). Recruitment efforts were assessed and refined to reach a large eligible population.
Results:
A multichannel approach led to textasciitilde300,000 Heartline study app installations. In total, 34,244 participants completed enrollment (Feb 2020-Dec 2022), of whom 28,155 completed baseline demographic assessments. Participants were widely distributed geographically, with notable representation of outlying and rural areas (
Figure 1
). Women accounted for 54% of the participants. Overall, most participants were White (93.0%), with Asian, Black, and Hispanic participants representing 2.8%, 2.7%, and 2.5%, respectively.
Conclusion:
The Heartline study demonstrated the ability to recruit large numbers of participants aged ≥65 years using a direct-to-participant approach. Broad outreach strategies ensured gender and geographic diversity, enrolling a higher percentage of women than typical cardiology trials, and participation from rural areas. However, underrepresentation across racial/ethnic groups persisted and strategies to increase enrollment are needed. For similar trials, a strategic multichannel approach, with strong data and analytics capabilities may be beneficial to effectively target and enroll eligible participants.
Chen, Meida; Han, Kangle; Yu, Zifan; Feng, Andrew; Hou, Yu; You, Suya; Soibelman, Lucio
An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation Journal Article
In: Remote Sensing, vol. 16, no. 22, pp. 4240, 2024, ISSN: 2072-4292.
@article{chen_aerial_2024,
title = {An Aerial Photogrammetry Benchmark Dataset for Point Cloud Segmentation and Style Translation},
author = {Meida Chen and Kangle Han and Zifan Yu and Andrew Feng and Yu Hou and Suya You and Lucio Soibelman},
url = {https://www.mdpi.com/2072-4292/16/22/4240},
doi = {10.3390/rs16224240},
issn = {2072-4292},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Remote Sensing},
volume = {16},
number = {22},
pages = {4240},
abstract = {The recent surge in diverse 3D datasets spanning various scales and applications marks a significant advancement in the field. However, the comprehensive process of data acquisition, refinement, and annotation at a large scale poses a formidable challenge, particularly for individual researchers and small teams. To this end, we present a novel synthetic 3D point cloud generation framework that can produce detailed outdoor aerial photogrammetric 3D datasets with accurate ground truth annotations without the labor-intensive and time-consuming data collection/annotation processes. Our pipeline procedurally generates synthetic environments, mirroring real-world data collection and 3D reconstruction processes. A key feature of our framework is its ability to replicate consistent quality, noise patterns, and diversity similar to real-world datasets. This is achieved by adopting UAV flight patterns that resemble those used in real-world data collection processes (e.g., the cross-hatch flight pattern) across various synthetic terrains that are procedurally generated, thereby ensuring data consistency akin to real-world scenarios. Moreover, the generated datasets are enriched with precise semantic and instance annotations, eliminating the need for manual labeling. Our approach has led to the development and release of the Semantic Terrain Points Labeling—Synthetic 3D (STPLS3D) benchmark, an extensive outdoor 3D dataset encompassing over 16 km2, featuring up to 19 semantic labels. We also collected, reconstructed, and annotated four real-world datasets for validation purposes. Extensive experiments on these datasets demonstrate our synthetic datasets’ effectiveness, superior quality, and their value as a benchmark dataset for further point cloud research.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Bonial, Claire; Lukin, Stephanie M.; Abrams, Mitchell; Baker, Anthony; Donatelli, Lucia; Foots, Ashley; Hayes, Cory J.; Henry, Cassidy; Hudson, Taylor; Marge, Matthew; Pollard, Kimberly A.; Artstein, Ron; Traum, David; Voss, Clare R.
Human–robot dialogue annotation for multi-modal common ground Journal Article
In: Lang Resources & Evaluation, 2024, ISSN: 1574-020X, 1574-0218.
@article{bonial_humanrobot_2024,
title = {Human–robot dialogue annotation for multi-modal common ground},
author = {Claire Bonial and Stephanie M. Lukin and Mitchell Abrams and Anthony Baker and Lucia Donatelli and Ashley Foots and Cory J. Hayes and Cassidy Henry and Taylor Hudson and Matthew Marge and Kimberly A. Pollard and Ron Artstein and David Traum and Clare R. Voss},
url = {https://link.springer.com/10.1007/s10579-024-09784-2},
doi = {10.1007/s10579-024-09784-2},
issn = {1574-020X, 1574-0218},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Lang Resources & Evaluation},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Marti, Deniz; Budathoki, Anjila; Ding, Yi; Lucas, Gale; Nelson, David
How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations? Journal Article
In: International Journal of Human–Computer Interaction, pp. 1–12, 2024, ISSN: 1044-7318, 1532-7590.
@article{marti_how_2024,
title = {How Does Acknowledging Users’ Preferences Impact AI’s Ability to Make Conflicting Recommendations?},
author = {Deniz Marti and Anjila Budathoki and Yi Ding and Gale Lucas and David Nelson},
url = {https://www.tandfonline.com/doi/full/10.1080/10447318.2024.2426035},
doi = {10.1080/10447318.2024.2426035},
issn = {1044-7318, 1532-7590},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {International Journal of Human–Computer Interaction},
pages = {1–12},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Vlake, Johan H; Drop, Denzel L Q; Bommel, Jasper Van; Riva, Giuseppe; Wiederhold, Brenda K; Cipresso, Pietro; Rizzo, Albert S; Rothbaum, Barbara O; Botella, Cristina; Hooft, Lotty; Bienvenu, Oscar J; Jung, Christian; Geerts, Bart; Wils, Evert-Jan; Gommers, Diederik; Genderen, Michel E Van; Group, RATE-XR Expert
Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline Journal Article
In: J Med Internet Res, vol. 26, pp. e56790, 2024, ISSN: 1438-8871.
@article{vlake_reporting_2024,
title = {Reporting Guidelines for the Early-Phase Clinical Evaluation of Applications Using Extended Reality: RATE-XR Qualitative Study Guideline},
author = {Johan H Vlake and Denzel L Q Drop and Jasper Van Bommel and Giuseppe Riva and Brenda K Wiederhold and Pietro Cipresso and Albert S Rizzo and Barbara O Rothbaum and Cristina Botella and Lotty Hooft and Oscar J Bienvenu and Christian Jung and Bart Geerts and Evert-Jan Wils and Diederik Gommers and Michel E Van Genderen and RATE-XR Expert Group},
url = {https://www.jmir.org/2024/1/e56790},
doi = {10.2196/56790},
issn = {1438-8871},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {J Med Internet Res},
volume = {26},
pages = {e56790},
abstract = {Background
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Extended reality (XR), encompassing technologies such as virtual reality, augmented reality, and mixed reality, has rapidly gained prominence in health care. However, existing XR research often lacks rigor, proper controls, and standardization.
Objective
To address this and to enhance the transparency and quality of reporting in early-phase clinical evaluations of XR applications, we present the “Reporting for the early-phase clinical evaluation of applications using extended reality” (RATE-XR) guideline.
Methods
We conducted a 2-round modified Delphi process involving experts from diverse stakeholder categories, and the RATE-XR is therefore the result of a consensus-based, multistakeholder effort.
Results
The guideline comprises 17 XR-specific (composed of 18 subitems) and 14 generic reporting items, each with a complementary Explanation & Elaboration section.
Conclusions
The items encompass critical aspects of XR research, from clinical utility and safety to human factors and ethics. By offering a comprehensive checklist for reporting, the RATE-XR guideline facilitates robust assessment and replication of early-stage clinical XR studies. It underscores the need for transparency, patient-centeredness, and balanced evaluation of the applications of XR in health care. By providing an actionable checklist of minimal reporting items, this guideline will facilitate the responsible development and integration of XR technologies into health care and related fields.
Roemmele, Melissa; Gordon, Andrew S.
From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items Miscellaneous
2024, (Version Number: 1).
@misc{roemmele_test-taking_2024-1,
title = {From Test-Taking to Test-Making: Examining LLM Authoring of Commonsense Assessment Items},
author = {Melissa Roemmele and Andrew S. Gordon},
url = {https://arxiv.org/abs/2410.14897},
doi = {10.48550/ARXIV.2410.14897},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-05},
publisher = {arXiv},
abstract = {LLMs can now perform a variety of complex writing tasks. They also excel in answering questions pertaining to natural language inference and commonsense reasoning. Composing these questions is itself a skilled writing task, so in this paper we consider LLMs as authors of commonsense assessment items. We prompt LLMs to generate items in the style of a prominent benchmark for commonsense reasoning, the Choice of Plausible Alternatives (COPA). We examine the outcome according to analyses facilitated by the LLMs and human annotation. We find that LLMs that succeed in answering the original COPA benchmark are also more successful in authoring their own items.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Lin, Spencer; Rizk, Basem; Jun, Miru; Artze, Andy; Sullivan, Caitlin; Mozgai, Sharon; Fisher, Scott
Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents Miscellaneous
2024, (arXiv:2410.20116 [cs]).
@misc{lin_estuary_2024,
title = {Estuary: A Framework For Building Multimodal Low-Latency Real-Time Socially Interactive Agents},
author = {Spencer Lin and Basem Rizk and Miru Jun and Andy Artze and Caitlin Sullivan and Sharon Mozgai and Scott Fisher},
url = {http://arxiv.org/abs/2410.20116},
doi = {10.1145/3652988.3696198},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
abstract = {The rise in capability and ubiquity of generative artificial intelligence (AI) technologies has enabled its application to the field of Socially Interactive Agents (SIAs). Despite rising interest in modern AI-powered components used for real-time SIA research, substantial friction remains due to the absence of a standardized and universal SIA framework. To target this absence, we developed Estuary: a multimodal (text, audio, and soon video) framework which facilitates the development of low-latency, real-time SIAs. Estuary seeks to reduce repeat work between studies and to provide a flexible platform that can be run entirely off-cloud to maximize configurability, controllability, reproducibility of studies, and speed of agent response times. We are able to do this by constructing a robust multimodal framework which incorporates current and future components seamlessly into a modular and interoperable architecture.},
note = {arXiv:2410.20116 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Tran, Minh; Kim, Yelin; Su, Che-Chun; Kuo, Cheng-Hao; Sun, Min; Soleymani, Mohammad
In: Leonardis, Aleš; Ricci, Elisa; Roth, Stefan; Russakovsky, Olga; Sattler, Torsten; Varol, Gül (Ed.): Computer Vision – ECCV 2024, vol. 15138, pp. 1–19, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-72988-1 978-3-031-72989-8, (Series Title: Lecture Notes in Computer Science).
@incollection{leonardis_ex2eg-mae_2024,
title = {Ex2Eg-MAE: A Framework for Adaptation of Exocentric Video Masked Autoencoders for Egocentric Social Role Understanding},
author = {Minh Tran and Yelin Kim and Che-Chun Su and Cheng-Hao Kuo and Min Sun and Mohammad Soleymani},
editor = {Aleš Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and Gül Varol},
url = {https://link.springer.com/10.1007/978-3-031-72989-8_1},
doi = {10.1007/978-3-031-72989-8_1},
isbn = {978-3-031-72988-1 978-3-031-72989-8},
year = {2024},
date = {2024-10-01},
urldate = {2024-12-06},
booktitle = {Computer Vision – ECCV 2024},
volume = {15138},
pages = {1–19},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Chen, Gonglin; Wu, Jinsen; Chen, Haiwei; Teng, Wenbin; Gao, Zhiyuan; Feng, Andrew; Qin, Rongjun; Zhao, Yajie
Geometry-aware Feature Matching for Large-Scale Structure from Motion Miscellaneous
2024, (Version Number: 3).
@misc{chen_geometry-aware_2024,
title = {Geometry-aware Feature Matching for Large-Scale Structure from Motion},
author = {Gonglin Chen and Jinsen Wu and Haiwei Chen and Wenbin Teng and Zhiyuan Gao and Andrew Feng and Rongjun Qin and Yajie Zhao},
url = {https://arxiv.org/abs/2409.02310},
doi = {10.48550/ARXIV.2409.02310},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Establishing consistent and dense correspondences across multiple images is crucial for Structure from Motion (SfM) systems. Significant view changes, such as air-to-ground with very sparse view overlap, pose an even greater challenge to the correspondence solvers. We present a novel optimization-based approach that significantly enhances existing feature matching methods by introducing geometry cues in addition to color cues. This helps fill gaps when there is less overlap in large-scale scenarios. Our method formulates geometric verification as an optimization problem, guiding feature matching within detector-free methods and using sparse correspondences from detector-based methods as anchor points. By enforcing geometric constraints via the Sampson Distance, our approach ensures that the denser correspondences from detector-free methods are geometrically consistent and more accurate. This hybrid strategy significantly improves correspondence density and accuracy, mitigates multi-view inconsistencies, and leads to notable advancements in camera pose accuracy and point cloud density. It outperforms state-of-the-art feature matching methods on benchmark datasets and enables feature matching in challenging extreme large-scale settings.},
note = {Version Number: 3},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Pitfalls of Embodiment in Human-Agent Experiment Design Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–9, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_pitfalls_2024,
title = {Pitfalls of Embodiment in Human-Agent Experiment Design},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3673958},
doi = {10.1145/3652988.3673958},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–9},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gao, Zhiyuan; Teng, Wenbin; Chen, Gonglin; Wu, Jinsen; Xu, Ningli; Qin, Rongjun; Feng, Andrew; Zhao, Yajie
Skyeyes: Ground Roaming using Aerial View Images Miscellaneous
2024, (Version Number: 1).
@misc{gao_skyeyes_2024,
title = {Skyeyes: Ground Roaming using Aerial View Images},
author = {Zhiyuan Gao and Wenbin Teng and Gonglin Chen and Jinsen Wu and Ningli Xu and Rongjun Qin and Andrew Feng and Yajie Zhao},
url = {https://arxiv.org/abs/2409.16685},
doi = {10.48550/ARXIV.2409.16685},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Integrating aerial imagery-based scene generation into applications like autonomous driving and gaming enhances realism in 3D environments, but challenges remain in creating detailed content for occluded areas and ensuring real-time, consistent rendering. In this paper, we introduce Skyeyes, a novel framework that can generate photorealistic sequences of ground view images using only aerial view inputs, thereby creating a ground roaming experience. More specifically, we combine a 3D representation with a view consistent generation model, which ensures coherence between generated images. This method allows for the creation of geometrically consistent ground view images, even with large view gaps. The images maintain improved spatial-temporal coherence and realism, enhancing scene comprehension and visualization from aerial perspectives. To the best of our knowledge, there are no publicly available datasets that contain pairwise geo-aligned aerial and ground view imagery. Therefore, we build a large, synthetic, and geo-aligned dataset using Unreal Engine. Both qualitative and quantitative analyses on this synthetic dataset display superior results compared to other leading synthesis approaches. See the project page for more results: https://chaoren2357.github.io/website-skyeyes/.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Hale, James; Schweitzer, Lindsey; Gratch, Jonathan
Integration of LLMs with Virtual Character Embodiment Proceedings Article
In: Proceedings of the ACM International Conference on Intelligent Virtual Agents, pp. 1–3, ACM, GLASGOW United Kingdom, 2024, ISBN: 979-8-4007-0625-7.
@inproceedings{hale_integration_2024,
title = {Integration of LLMs with Virtual Character Embodiment},
author = {James Hale and Lindsey Schweitzer and Jonathan Gratch},
url = {https://dl.acm.org/doi/10.1145/3652988.3696199},
doi = {10.1145/3652988.3696199},
isbn = {979-8-4007-0625-7},
year = {2024},
date = {2024-09-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the ACM International Conference on Intelligent Virtual Agents},
pages = {1–3},
publisher = {ACM},
address = {GLASGOW United Kingdom},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Filter
2024
Roth, Holger R.; Beutel, Daniel J.; Cheng, Yan; Marques, Javier Fernandez; Pan, Heng; Chen, Chester; Zhang, Zhihong; Wen, Yuhong; Yang, Sean; Isaac,; Yang,; Hsieh, Yuan-Ting; Xu, Ziyue; Xu, Daguang; Lane, Nicholas D.; Feng, Andrew
Supercharging Federated Learning with Flower and NVIDIA FLARE Miscellaneous
2024, (arXiv:2407.00031 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{roth_supercharging_2024,
title = {Supercharging Federated Learning with Flower and NVIDIA FLARE},
author = {Holger R. Roth and Daniel J. Beutel and Yan Cheng and Javier Fernandez Marques and Heng Pan and Chester Chen and Zhihong Zhang and Yuhong Wen and Sean Yang and Isaac and Yang and Yuan-Ting Hsieh and Ziyue Xu and Daguang Xu and Nicholas D. Lane and Andrew Feng},
url = {http://arxiv.org/abs/2407.00031},
doi = {10.48550/arXiv.2407.00031},
year = {2024},
date = {2024-07-01},
urldate = {2025-01-16},
publisher = {arXiv},
abstract = {Several open-source systems, such as Flower and NVIDIA FLARE, have been developed in recent years while focusing on different aspects of federated learning (FL). Flower is dedicated to implementing a cohesive approach to FL, analytics, and evaluation. Over time, Flower has cultivated extensive strategies and algorithms tailored for FL application development, fostering a vibrant FL community in research and industry. Conversely, FLARE has prioritized the creation of an enterprise-ready, resilient runtime environment explicitly designed for FL applications in production environments. In this paper, we describe our initial integration of both frameworks and show how they can work together to supercharge the FL ecosystem as a whole. Through the seamless integration of Flower and FLARE, applications crafted within the Flower framework can effortlessly operate within the FLARE runtime environment without necessitating any modifications. This initial integration streamlines the process, eliminating complexities and ensuring smooth interoperability between the two platforms, thus enhancing the overall efficiency and accessibility of FL applications.},
note = {arXiv:2407.00031 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Diaz-Pinto, Andres; Alle, Sachidanand; Nath, Vishwesh; Tang, Yucheng; Ihsani, Alvin; Asad, Muhammad; Pérez-García, Fernando; Mehta, Pritesh; Li, Wenqi; Flores, Mona; Roth, Holger R.; Vercauteren, Tom; Xu, Daguang; Dogra, Prerna; Ourselin, Sebastien; Feng, Andrew; Cardoso, M. Jorge
MONAI Label: A framework for AI-assisted interactive labeling of 3D medical images Journal Article
In: Medical Image Analysis, vol. 95, pp. 103207, 2024, ISSN: 13618415.
@article{diaz-pinto_monai_2024,
title = {MONAI Label: A framework for AI-assisted interactive labeling of 3D medical images},
author = {Andres Diaz-Pinto and Sachidanand Alle and Vishwesh Nath and Yucheng Tang and Alvin Ihsani and Muhammad Asad and Fernando Pérez-García and Pritesh Mehta and Wenqi Li and Mona Flores and Holger R. Roth and Tom Vercauteren and Daguang Xu and Prerna Dogra and Sebastien Ourselin and Andrew Feng and M. Jorge Cardoso},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1361841524001324},
doi = {10.1016/j.media.2024.103207},
issn = {13618415},
year = {2024},
date = {2024-07-01},
urldate = {2025-01-16},
journal = {Medical Image Analysis},
volume = {95},
pages = {103207},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Huang, Shuo; Jones, Fred; Gurney, Nikolos; Pynadath, David; Srivastava, Kunal; Trent, Stoney; Wu, Peggy; Zhu, Quanyan
PsybORG+: Modeling and Simulation for Detecting Cognitive Biases in Advanced Persistent Threats Miscellaneous
2024, (Version Number: 3).
Abstract | Links | BibTeX | Tags: DTIC
@misc{huang_psyborg_2024,
title = {PsybORG+: Modeling and Simulation for Detecting Cognitive Biases in Advanced Persistent Threats},
author = {Shuo Huang and Fred Jones and Nikolos Gurney and David Pynadath and Kunal Srivastava and Stoney Trent and Peggy Wu and Quanyan Zhu},
url = {https://arxiv.org/abs/2408.01310},
doi = {10.48550/ARXIV.2408.01310},
year = {2024},
date = {2024-07-01},
urldate = {2024-12-05},
publisher = {arXiv},
abstract = {Advanced Persistent Threats (APTs) bring significant challenges to cybersecurity due to their sophisticated and stealthy nature. Traditional cybersecurity measures fail to defend against APTs. Cognitive vulnerabilities can significantly influence attackers' decision-making processes, which presents an opportunity for defenders to exploit. This work introduces PsybORG$ˆ+$, a multi-agent cybersecurity simulation environment designed to model APT behaviors influenced by cognitive vulnerabilities. A classification model is built for cognitive vulnerability inference and a simulator is designed for synthetic data generation. Results show that PsybORG$ˆ+$ can effectively model APT attackers with different loss aversion and confirmation bias levels. The classification model has at least a 0.83 accuracy rate in predicting cognitive vulnerabilities.},
note = {Version Number: 3},
keywords = {DTIC},
pubstate = {published},
tppubtype = {misc}
}
Owayyed, Mohammed Al; Tielman, Myrthe; Hartholt, Arno; Specht, Marcus; Brinkman, Willem-Paul
Agent-based social skills training systems: the ARTES architecture, interaction characteristics, learning theories and future outlooks Journal Article
In: Behaviour & Information Technology, pp. 1–28, 2024, ISSN: 0144-929X, 1362-3001.
Links | BibTeX | Tags: Virtual Agents, Virtual Humans
@article{al_owayyed_agent-based_2024,
title = {Agent-based social skills training systems: the ARTES architecture, interaction characteristics, learning theories and future outlooks},
author = {Mohammed Al Owayyed and Myrthe Tielman and Arno Hartholt and Marcus Specht and Willem-Paul Brinkman},
url = {https://www.tandfonline.com/doi/full/10.1080/0144929X.2024.2374891},
doi = {10.1080/0144929X.2024.2374891},
issn = {0144-929X, 1362-3001},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {Behaviour & Information Technology},
pages = {1–28},
keywords = {Virtual Agents, Virtual Humans},
pubstate = {published},
tppubtype = {article}
}
Bell, Imogen H.; Pot-Kolder, Roos; Rizzo, Albert; Rus-Calafell, Mar; Cardi, Valentina; Cella, Matteo; Ward, Thomas; Riches, Simon; Reinoso, Martin; Thompson, Andrew; Alvarez-Jimenez, Mario; Valmaggia, Lucia
Advances in the use of virtual reality to treat mental health conditions Journal Article
In: Nat Rev Psychol, 2024, ISSN: 2731-0574.
@article{bell_advances_2024,
title = {Advances in the use of virtual reality to treat mental health conditions},
author = {Imogen H. Bell and Roos Pot-Kolder and Albert Rizzo and Mar Rus-Calafell and Valentina Cardi and Matteo Cella and Thomas Ward and Simon Riches and Martin Reinoso and Andrew Thompson and Mario Alvarez-Jimenez and Lucia Valmaggia},
url = {https://www.nature.com/articles/s44159-024-00334-9},
doi = {10.1038/s44159-024-00334-9},
issn = {2731-0574},
year = {2024},
date = {2024-07-01},
urldate = {2024-07-11},
journal = {Nat Rev Psychol},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Gunasekara, Chulaka; Kim, Seokhwan; D'Haro, Luis Fernando; Rastogi, Abhinav; Chen, Yun-Nung; Eric, Mihail; Hedayatnia, Behnam; Gopalakrishnan, Karthik; Liu, Yang; Huang, Chao-Wei; Hakkani-Tür, Dilek; Li, Jinchao; Zhu, Qi; Luo, Lingxiao; Liden, Lars; Huang, Kaili; Shayandeh, Shahin; Liang, Runze; Peng, Baolin; Zhang, Zheng; Shukla, Swadheen; Huang, Minlie; Gao, Jianfeng; Mehri, Shikib; Feng, Yulan; Gordon, Carla; Alavi, Seyed Hossein; Traum, David; Eskenazi, Maxine; Beirami, Ahmad; Cho, Eunjoon; Crook, Paul A.; De, Ankita; Geramifard, Alborz; Kottur, Satwik; Moon, Seungwhan; Poddar, Shivani; Subba, Rajen
Overview of the Ninth Dialog System Technology Challenge: DSTC9 Journal Article
In: IEEE/ACM Trans. Audio Speech Lang. Process., pp. 1–10, 2024, ISSN: 2329-9290, 2329-9304.
Links | BibTeX | Tags: Natural Language
@article{gunasekara_overview_2024,
title = {Overview of the Ninth Dialog System Technology Challenge: DSTC9},
author = {Chulaka Gunasekara and Seokhwan Kim and Luis Fernando D'Haro and Abhinav Rastogi and Yun-Nung Chen and Mihail Eric and Behnam Hedayatnia and Karthik Gopalakrishnan and Yang Liu and Chao-Wei Huang and Dilek Hakkani-Tür and Jinchao Li and Qi Zhu and Lingxiao Luo and Lars Liden and Kaili Huang and Shahin Shayandeh and Runze Liang and Baolin Peng and Zheng Zhang and Swadheen Shukla and Minlie Huang and Jianfeng Gao and Shikib Mehri and Yulan Feng and Carla Gordon and Seyed Hossein Alavi and David Traum and Maxine Eskenazi and Ahmad Beirami and Eunjoon Cho and Paul A. Crook and Ankita De and Alborz Geramifard and Satwik Kottur and Seungwhan Moon and Shivani Poddar and Rajen Subba},
url = {https://ieeexplore.ieee.org/document/10595468/},
doi = {10.1109/TASLP.2024.3426331},
issn = {2329-9290, 2329-9304},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {IEEE/ACM Trans. Audio Speech Lang. Process.},
pages = {1–10},
keywords = {Natural Language},
pubstate = {published},
tppubtype = {article}
}
Han, Bin; Yau, Cleo; Lei, Su; Gratch, Jonathan
In-Depth Analysis of Emotion Recognition through Knowledge-Based Large Language Models Miscellaneous
2024, (arXiv:2408.00780 [cs]).
Abstract | Links | BibTeX | Tags: Virtual Humans
@misc{han_-depth_2024,
title = {In-Depth Analysis of Emotion Recognition through Knowledge-Based Large Language Models},
author = {Bin Han and Cleo Yau and Su Lei and Jonathan Gratch},
url = {http://arxiv.org/abs/2408.00780},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Emotion recognition in social situations is a complex task that requires integrating information from both facial expressions and the situational context. While traditional approaches to automatic emotion recognition have focused on decontextualized signals, recent research emphasizes the importance of context in shaping emotion perceptions. This paper contributes to the emerging field of context-based emotion recognition by leveraging psychological theories of human emotion perception to inform the design of automated methods. We propose an approach that combines emotion recognition methods with Bayesian Cue Integration (BCI) to integrate emotion inferences from decontextualized facial expressions and contextual knowledge inferred via Large-language Models. We test this approach in the context of interpreting facial expressions during a social task, the prisoner's dilemma. Our results provide clear support for BCI across a range of automatic emotion recognition methods. The best automated method achieved results comparable to human observers, suggesting the potential for this approach to advance the field of affective computing.},
note = {arXiv:2408.00780 [cs]},
keywords = {Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Xiao, Hanyuan; Chen, Yingshu; Huang, Huajian; Xiong, Haolin; Yang, Jing; Prasad, Pratusha; Zhao, Yajie
Localized Gaussian Splatting Editing with Contextual Awareness Miscellaneous
2024, (arXiv:2408.00083 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, VGL
@misc{xiao_localized_2024,
title = {Localized Gaussian Splatting Editing with Contextual Awareness},
author = {Hanyuan Xiao and Yingshu Chen and Huajian Huang and Haolin Xiong and Jing Yang and Pratusha Prasad and Yajie Zhao},
url = {http://arxiv.org/abs/2408.00083},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-16},
publisher = {arXiv},
abstract = {Recent text-guided generation of individual 3D object has achieved great success using diffusion priors. However, these methods are not suitable for object insertion and replacement tasks as they do not consider the background, leading to illumination mismatches within the environment. To bridge the gap, we introduce an illumination-aware 3D scene editing pipeline for 3D Gaussian Splatting (3DGS) representation. Our key observation is that inpainting by the state-of-the-art conditional 2D diffusion model is consistent with background in lighting. To leverage the prior knowledge from the well-trained diffusion models for 3D object generation, our approach employs a coarse-to-fine objection optimization pipeline with inpainted views. In the first coarse step, we achieve image-to-3D lifting given an ideal inpainted view. The process employs 3D-aware diffusion prior from a view-conditioned diffusion model, which preserves illumination present in the conditioning image. To acquire an ideal inpainted image, we introduce an Anchor View Proposal (AVP) algorithm to find a single view that best represents the scene illumination in target region. In the second Texture Enhancement step, we introduce a novel Depth-guided Inpainting Score Distillation Sampling (DI-SDS), which enhances geometry and texture details with the inpainting diffusion prior, beyond the scope of the 3D-aware diffusion prior knowledge in the first coarse step. DI-SDS not only provides fine-grained texture enhancement, but also urges optimization to respect scene lighting. Our approach efficiently achieves local editing with global illumination consistency without explicitly modeling light transport. We demonstrate robustness of our method by evaluating editing in real scenes containing explicit highlight and shadows, and compare against the state-of-the-art text-to-3D editing methods.},
note = {arXiv:2408.00083 [cs]},
keywords = {DTIC, VGL},
pubstate = {published},
tppubtype = {misc}
}
Liu, Ruying; Wu, Wanjing; Becerik-Gerber, Burcin; Lucas, Gale M.
2024, (arXiv:2407.10441 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, Virtual Worlds
@misc{liu_enhancing_2024,
title = {Enhancing Building Safety Design for Active Shooter Incidents: Exploration of Building Exit Parameters using Reinforcement Learning-Based Simulations},
author = {Ruying Liu and Wanjing Wu and Burcin Becerik-Gerber and Gale M. Lucas},
url = {http://arxiv.org/abs/2407.10441},
year = {2024},
date = {2024-07-01},
urldate = {2024-09-17},
publisher = {arXiv},
abstract = {With the alarming rise in active shooter incidents (ASIs) in the United States, enhancing public safety through building design has become a pressing need. This study proposes a reinforcement learning-based simulation approach addressing gaps in existing research that has neglected the dynamic behaviours of shooters. We developed an autonomous agent to simulate an active shooter within a realistic office environment, aiming to offer insights into the interactions between building design parameters and ASI outcomes. A case study is conducted to quantitatively investigate the impact of building exit numbers (total count of accessible exits) and configuration (arrangement of which exits are available or not) on evacuation and harm rates. Findings demonstrate that greater exit availability significantly improves evacuation outcomes and reduces harm. Exits nearer to the shooter's initial position hold greater importance for accessibility than those farther away. By encompassing dynamic shooter behaviours, this study offers preliminary insights into effective building safety design against evolving threats.},
note = {arXiv:2407.10441 [cs]},
keywords = {DTIC, Virtual Worlds},
pubstate = {published},
tppubtype = {misc}
}
Ke, Pei; Wen, Bosi; Feng, Andrew; Liu, Xiao; Lei, Xuanyu; Cheng, Jiale; Wang, Shengyuan; Zeng, Aohan; Dong, Yuxiao; Wang, Hongning; Tang, Jie; Huang, Minlie
CritiqueLLM: Towards an Informative Critique Generation Model for Evaluation of Large Language Model Generation Proceedings Article
In: Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 13034–13054, Association for Computational Linguistics, Bangkok, Thailand, 2024.
Links | BibTeX | Tags: Natural Language
@inproceedings{ke_critiquellm_2024,
title = {CritiqueLLM: Towards an Informative Critique Generation Model for Evaluation of Large Language Model Generation},
author = {Pei Ke and Bosi Wen and Andrew Feng and Xiao Liu and Xuanyu Lei and Jiale Cheng and Shengyuan Wang and Aohan Zeng and Yuxiao Dong and Hongning Wang and Jie Tang and Minlie Huang},
url = {https://aclanthology.org/2024.acl-long.704},
doi = {10.18653/v1/2024.acl-long.704},
year = {2024},
date = {2024-06-01},
urldate = {2025-01-16},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {13034–13054},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
keywords = {Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Lu, Shuhong; Jin, Zhangyu; Rajendran, Vickram; Harari, Michal; Feng, Andrew; Melo, Celso M. De
Synthetic-to-real adaptation for complex action recognition in surveillance applications Proceedings Article
In: Manser, Kimberly E.; Melo, Celso De; Rao, Raghuveer M.; Howell, Christopher L. (Ed.): Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II, pp. 14, SPIE, National Harbor, United States, 2024, ISBN: 978-1-5106-7388-5 978-1-5106-7389-2.
@inproceedings{lu_synthetic–real_2024,
title = {Synthetic-to-real adaptation for complex action recognition in surveillance applications},
author = {Shuhong Lu and Zhangyu Jin and Vickram Rajendran and Michal Harari and Andrew Feng and Celso M. De Melo},
editor = {Kimberly E. Manser and Celso De Melo and Raghuveer M. Rao and Christopher L. Howell},
url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/13035/3012393/Synthetic-to-real-adaptation-for-complex-action-recognition-in-surveillance/10.1117/12.3012393.full},
doi = {10.1117/12.3012393},
isbn = {978-1-5106-7388-5 978-1-5106-7389-2},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
booktitle = {Synthetic Data for Artificial Intelligence and Machine Learning: Tools, Techniques, and Applications II},
pages = {14},
publisher = {SPIE},
address = {National Harbor, United States},
keywords = {DTIC},
pubstate = {published},
tppubtype = {inproceedings}
}
Nurunnabi, Abdul; Teferle, Felicia; Laefer, Debra F.; Chen, Meida; Ali, Mir Masoom
Development of a Precise Tree Structure from LiDAR Point Clouds Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 301–308, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: Narrative, VGL
@article{nurunnabi_development_2024,
title = {Development of a Precise Tree Structure from LiDAR Point Clouds},
author = {Abdul Nurunnabi and Felicia Teferle and Debra F. Laefer and Meida Chen and Mir Masoom Ali},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/301/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-301-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-11},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {301–308},
abstract = {Abstract. A precise tree structure that represents the distribution of tree stem, branches, and leaves is crucial for accurately capturing the full representation of a tree. Light Detection and Ranging (LiDAR)-based three-dimensional (3D) point clouds (PCs) capture the geometry of scanned objects including forests stands and individual trees. PCs are irregular, unstructured, often noisy, and contaminated by outliers. Researchers have struggled to develop methods to separate leaves and wood without losing the tree geometry. This paper proposes a solution that employs only the spatial coordinates (x, y, z) of the PC. The new algorithm works as a filtering approach, utilizing multi-scale neighborhood-based geometric features (GFs) e.g., linearity, planarity, and verticality to classify linear (wood) and non-linear (leaf) points. This involves finding potential wood points and coupling them with an octree-based segmentation to develop a tree architecture. The main contributions of this paper are (i) investigating the potential of different GFs to split linear and non-linear points, (ii) introducing a novel method that pointwise classifies leaf and wood points, and (iii) developing a precise 3D tree structure. The performance of the new algorithm has been demonstrated through terrestrial laser scanning PCs. For a Scots pine tree, the new method classifies leaf and wood points with an overall accuracy of 97.9%.},
keywords = {Narrative, VGL},
pubstate = {published},
tppubtype = {article}
}
Zhang, Mingyuan; Cai, Zhongang; Pan, Liang; Hong, Fangzhou; Guo, Xinying; Yang, Lei; Liu, Ziwei
MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model Journal Article
In: IEEE Trans. Pattern Anal. Mach. Intell., vol. 46, no. 6, pp. 4115–4128, 2024, ISSN: 0162-8828, 2160-9292, 1939-3539.
@article{zhang_motiondiffuse_2024,
title = {MotionDiffuse: Text-Driven Human Motion Generation With Diffusion Model},
author = {Mingyuan Zhang and Zhongang Cai and Liang Pan and Fangzhou Hong and Xinying Guo and Lei Yang and Ziwei Liu},
url = {https://ieeexplore.ieee.org/document/10416192/},
doi = {10.1109/TPAMI.2024.3355414},
issn = {0162-8828, 2160-9292, 1939-3539},
year = {2024},
date = {2024-06-01},
urldate = {2024-07-18},
journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
volume = {46},
number = {6},
pages = {4115–4128},
keywords = {VGL},
pubstate = {published},
tppubtype = {article}
}
Yin, Yinxuan; Nayyar, Mollik; Holman, Daniel; Lucas, Gale; Holbrook, Colin; Wagner, Alan
Validation and Evacuee Modeling of Virtual Robot-guided Emergency Evacuation Experiments Miscellaneous
2024.
Abstract | Links | BibTeX | Tags: DTIC, Virtual Humans
@misc{yin_validation_2024,
title = {Validation and Evacuee Modeling of Virtual Robot-guided Emergency Evacuation Experiments},
author = {Yinxuan Yin and Mollik Nayyar and Daniel Holman and Gale Lucas and Colin Holbrook and Alan Wagner},
url = {https://osf.io/mr78s},
doi = {10.31234/osf.io/mr78s},
year = {2024},
date = {2024-06-01},
urldate = {2024-09-17},
publisher = {Center for Open Science},
abstract = {Virtual Reality (VR) is an increasingly common tool for investigating human responses to emergency situations. Nonetheless, studies validating and comparing human subject behavior during real world emergencies to their responses in VR are notably rare, and no prior studies have validated whether human emergency responses to guidance from a robot are comparable in VR versus the real world. In the present pre-registered study, we used VR to replicate a previous robot- guided emergency evacuation study conducted in the real world and compared human subject behavior in matched physical and virtual environments. In both environments, human subjects were asked to follow a robot to a location and to then read an article. While reading, a fire alarm sounds. The robot then attempted to guide them to a distant, unfamiliar exit rather than nearby and familiar exits. We observed close correspondences between evacuee exit choice (the robot’s distant exit versus closer exits), evacuation time, and trust in the robot between the VR and physical environments. We further demonstrate that data collected in virtual reality can be used to create accurate motion models (mean error of 0.42 centimeters) predicting evacuee trajectories and locations in real life. Taken together, the results provide evidence for the ecological validity of VR approaches to studying human-robot interaction, particularly robot- guided emergency evacuation.},
keywords = {DTIC, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Saxon, Leslie; Faulk, Robert T; Boberg, Jill; Barrett, Trevor; McLelland, Steve
In: J. Spec. Oper. Med., 2024, ISSN: 1553-9768.
Links | BibTeX | Tags: CBC, DTIC
@article{saxon_continuous_2024,
title = {Continuous Assessment of Active-Duty Army Special Operations and Reconnaissance Marines Using Digital Devices and Custom Software: The Digital Comprehensive Operator Readiness Assessment (DcORA) Study},
author = {Leslie Saxon and Robert T Faulk and Jill Boberg and Trevor Barrett and Steve McLelland},
url = {https://www.jsomonline.org/Citations/PXKK-I23D.php},
doi = {10.55460/PXKK-I23D},
issn = {1553-9768},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-25},
journal = {J. Spec. Oper. Med.},
keywords = {CBC, DTIC},
pubstate = {published},
tppubtype = {article}
}
Greenwald, Eric; Krakowski, Ari; Hurt, Timothy; Grindstaff, Kelly; Wang, Ning
It's like I'm the AI: Youth Sensemaking About AI through Metacognitive Embodiment Proceedings Article
In: Proceedings of the 23rd Annual ACM Interaction Design and Children Conference, pp. 789–793, ACM, Delft Netherlands, 2024, ISBN: 979-8-4007-0442-0.
Links | BibTeX | Tags: AI, Machine Learning
@inproceedings{greenwald_its_2024,
title = {It's like I'm the AI: Youth Sensemaking About AI through Metacognitive Embodiment},
author = {Eric Greenwald and Ari Krakowski and Timothy Hurt and Kelly Grindstaff and Ning Wang},
url = {https://dl.acm.org/doi/10.1145/3628516.3659395},
doi = {10.1145/3628516.3659395},
isbn = {979-8-4007-0442-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-25},
booktitle = {Proceedings of the 23rd Annual ACM Interaction Design and Children Conference},
pages = {789–793},
publisher = {ACM},
address = {Delft Netherlands},
keywords = {AI, Machine Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Meida; Lal, Devashish; Yu, Zifan; Xu, Jiuyi; Feng, Andrew; You, Suya; Nurunnabi, Abdul; Shi, Yangming
Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation Journal Article
In: Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci., vol. XLVIII-2-2024, pp. 49–54, 2024, ISSN: 2194-9034.
Abstract | Links | BibTeX | Tags: DTIC, Graphics, VGL
@article{chen_large-scale_2024,
title = {Large-Scale 3D Terrain Reconstruction Using 3D Gaussian Splatting for Visualization and Simulation},
author = {Meida Chen and Devashish Lal and Zifan Yu and Jiuyi Xu and Andrew Feng and Suya You and Abdul Nurunnabi and Yangming Shi},
url = {https://isprs-archives.copernicus.org/articles/XLVIII-2-2024/49/2024/},
doi = {10.5194/isprs-archives-XLVIII-2-2024-49-2024},
issn = {2194-9034},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-20},
journal = {Int. Arch. Photogramm. Remote Sens. Spatial Inf. Sci.},
volume = {XLVIII-2-2024},
pages = {49–54},
abstract = {Abstract. The fusion of low-cost unmanned aerial systems (UAS) with advanced photogrammetric techniques has revolutionized 3D terrain reconstruction, enabling the automated creation of detailed models. Concurrently, the advent of 3D Gaussian Splatting has introduced a paradigm shift in 3D data representation, offering visually realistic renditions distinct from traditional polygon-based models. Our research builds upon this foundation, aiming to integrate Gaussian Splatting into interactive simulations for immersive virtual environments. We address challenges such as collision detection by adopting a hybrid approach, combining Gaussian Splatting with photogrammetry-derived meshes. Through comprehensive experimentation covering varying terrain sizes and Gaussian densities, we evaluate scalability, performance, and limitations. Our findings contribute to advancing the use of advanced computer graphics techniques for enhanced 3D terrain visualization and simulation.},
keywords = {DTIC, Graphics, VGL},
pubstate = {published},
tppubtype = {article}
}
Nye, Benjamin D.; Core, Mark G.; Chereddy, Sai V. R.; Young, Vivian; Auerbach, Daniel
Bootstrapping Assessments for Team Simulations: Transfer Learning Between First-Person-Shooter Game Maps Book Section
In: Sottilare, Robert A.; Schwarz, Jessica (Ed.): Adaptive Instructional Systems, vol. 14727, pp. 261–271, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-60608-3 978-3-031-60609-0, (Series Title: Lecture Notes in Computer Science).
Links | BibTeX | Tags: DTIC, Learning Sciences, Machine Learning, UARC
@incollection{sottilare_bootstrapping_2024,
title = {Bootstrapping Assessments for Team Simulations: Transfer Learning Between First-Person-Shooter Game Maps},
author = {Benjamin D. Nye and Mark G. Core and Sai V. R. Chereddy and Vivian Young and Daniel Auerbach},
editor = {Robert A. Sottilare and Jessica Schwarz},
url = {https://link.springer.com/10.1007/978-3-031-60609-0_19},
doi = {10.1007/978-3-031-60609-0_19},
isbn = {978-3-031-60608-3 978-3-031-60609-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-18},
booktitle = {Adaptive Instructional Systems},
volume = {14727},
pages = {261–271},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC, Learning Sciences, Machine Learning, UARC},
pubstate = {published},
tppubtype = {incollection}
}
Core, Mark G.; Nye, Benjamin D.; Fegley, Brent D.
Trend-Aware Scenario Authoring: Adapting Training Toward Patterns from Real Operations Book Section
In: Sottilare, Robert A.; Schwarz, Jessica (Ed.): Adaptive Instructional Systems, vol. 14727, pp. 15–24, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-60608-3 978-3-031-60609-0, (Series Title: Lecture Notes in Computer Science).
Links | BibTeX | Tags: DTIC, Learning Sciences, UARC
@incollection{sottilare_trend-aware_2024,
title = {Trend-Aware Scenario Authoring: Adapting Training Toward Patterns from Real Operations},
author = {Mark G. Core and Benjamin D. Nye and Brent D. Fegley},
editor = {Robert A. Sottilare and Jessica Schwarz},
url = {https://link.springer.com/10.1007/978-3-031-60609-0_2},
doi = {10.1007/978-3-031-60609-0_2},
isbn = {978-3-031-60608-3 978-3-031-60609-0},
year = {2024},
date = {2024-06-01},
urldate = {2024-06-18},
booktitle = {Adaptive Instructional Systems},
volume = {14727},
pages = {15–24},
publisher = {Springer Nature Switzerland},
address = {Cham},
note = {Series Title: Lecture Notes in Computer Science},
keywords = {DTIC, Learning Sciences, UARC},
pubstate = {published},
tppubtype = {incollection}
}
Bohy, Hugo; Tran, Minh; Haddad, Kevin El; Dutoit, Thierry; Soleymani, Mohammad
Social-MAE: A Transformer-Based Multimodal Autoencoder for Face and Voice Proceedings Article
In: 2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG), pp. 1–5, IEEE, Istanbul, Turkiye, 2024, ISBN: 979-8-3503-9494-8.
@inproceedings{bohy_social-mae_2024,
title = {Social-MAE: A Transformer-Based Multimodal Autoencoder for Face and Voice},
author = {Hugo Bohy and Minh Tran and Kevin El Haddad and Thierry Dutoit and Mohammad Soleymani},
url = {https://ieeexplore.ieee.org/document/10581940/},
doi = {10.1109/FG59268.2024.10581940},
isbn = {979-8-3503-9494-8},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-18},
booktitle = {2024 IEEE 18th International Conference on Automatic Face and Gesture Recognition (FG)},
pages = {1–5},
publisher = {IEEE},
address = {Istanbul, Turkiye},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Liu, Rong; Xu, Rui; Hu, Yue; Chen, Meida; Feng, Andrew
AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field Miscellaneous
2024, (Version Number: 2).
Abstract | Links | BibTeX | Tags: Graphics, VGL
@misc{liu_atomgs_2024,
title = {AtomGS: Atomizing Gaussian Splatting for High-Fidelity Radiance Field},
author = {Rong Liu and Rui Xu and Yue Hu and Meida Chen and Andrew Feng},
url = {https://arxiv.org/abs/2405.12369},
doi = {10.48550/ARXIV.2405.12369},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-11},
publisher = {arXiv},
abstract = {3D Gaussian Splatting (3DGS) has recently advanced radiance field reconstruction by offering superior capabilities for novel view synthesis and real-time rendering speed. However, its strategy of blending optimization and adaptive density control might lead to sub-optimal results; it can sometimes yield noisy geometry and blurry artifacts due to prioritizing optimizing large Gaussians at the cost of adequately densifying smaller ones. To address this, we introduce AtomGS, consisting of Atomized Proliferation and Geometry-Guided Optimization. The Atomized Proliferation constrains ellipsoid Gaussians of various sizes into more uniform-sized Atom Gaussians. The strategy enhances the representation of areas with fine features by placing greater emphasis on densification in accordance with scene details. In addition, we proposed a Geometry-Guided Optimization approach that incorporates an Edge-Aware Normal Loss. This optimization method effectively smooths flat surfaces while preserving intricate details. Our evaluation shows that AtomGS outperforms existing state-of-the-art methods in rendering quality. Additionally, it achieves competitive accuracy in geometry reconstruction and offers a significant improvement in training speed over other SDF-based methods. More interactive demos can be found in our website (https://rongliu-leo.github.io/AtomGS/).},
note = {Version Number: 2},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {misc}
}
Chang, Di; Shi, Yichun; Gao, Quankai; Fu, Jessica; Xu, Hongyi; Song, Guoxian; Yan, Qing; Zhu, Yizhe; Yang, Xiao; Soleymani, Mohammad
MagicPose: Realistic Human Poses and Facial Expressions Retargeting with Identity-aware Diffusion Miscellaneous
2024, (arXiv:2311.12052 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{chang_magicpose_2024,
title = {MagicPose: Realistic Human Poses and Facial Expressions Retargeting with Identity-aware Diffusion},
author = {Di Chang and Yichun Shi and Quankai Gao and Jessica Fu and Hongyi Xu and Guoxian Song and Qing Yan and Yizhe Zhu and Xiao Yang and Mohammad Soleymani},
url = {http://arxiv.org/abs/2311.12052},
year = {2024},
date = {2024-05-01},
urldate = {2024-07-18},
publisher = {arXiv},
abstract = {In this work, we propose MagicPose, a diffusion-based model for 2D human pose and facial expression retargeting. Specifically, given a reference image, we aim to generate a person's new images by controlling the poses and facial expressions while keeping the identity unchanged. To this end, we propose a two-stage training strategy to disentangle human motions and appearance (e.g., facial expressions, skin tone and dressing), consisting of (1) the pre-training of an appearance-control block and (2) learning appearance-disentangled pose control. Our novel design enables robust appearance control over generated human images, including body, facial attributes, and even background. By leveraging the prior knowledge of image diffusion models, MagicPose generalizes well to unseen human identities and complex poses without the need for additional fine-tuning. Moreover, the proposed model is easy to use and can be considered as a plug-in module/extension to Stable Diffusion. The code is available at: https://github.com/Boese0601/MagicDance},
note = {arXiv:2311.12052 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Koresh, Caleb; Ustun, Volkan; Kumar, Rajay; Aris, Tim
Improving Reinforcement Learning Experiments in Unity through Waypoint Utilization Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Machine Learning
@article{koresh_improving_2024,
title = {Improving Reinforcement Learning Experiments in Unity through Waypoint Utilization},
author = {Caleb Koresh and Volkan Ustun and Rajay Kumar and Tim Aris},
url = {https://journals.flvc.org/FLAIRS/article/view/135571},
doi = {10.32473/flairs.37.1.135571},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {Multi-agent Reinforcement Learning (MARL) models teams of agents that learn by dynamically interacting with an environment and each other, presenting opportunities to train adaptive models for team-based scenarios. However, MARL algorithms pose substantial challenges due to their immense computational requirements. This paper introduces an automatically generated waypoint-based movement system to abstract and simplify complex environments in Unity while allowing agents to learn strategic cooperation. To demonstrate the effectiveness of our approach, we utilized a simple scenario with heterogeneous roles in each team. We trained this scenario on variations of realistic terrains and compared learning between fine-grained (almost) continuous and waypoint-based movement systems. Our results indicate efficiency in learning and improved performance with waypoint-based navigation. Furthermore, our results show that waypoint-based movement systems can effectively learn differentiated behavior policies for heterogeneous roles in these experiments. These early exploratory results point out the potential of waypoint-based navigation for reducing the computational costs of developing and training MARL models in complex environments. The complete project with all scenarios and results is available on GitHub: https://github.com/HATS-ICT/ml-agents-dodgeball-env-ICT.},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {article}
}
Aris, Timothy; Ustun, Volkan; Kumar, Rajay
Training Reinforcement Learning Agents to React to an Ambush for Military Simulations Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Simulation, VR
@article{aris_training_2024,
title = {Training Reinforcement Learning Agents to React to an Ambush for Military Simulations},
author = {Timothy Aris and Volkan Ustun and Rajay Kumar},
url = {https://journals.flvc.org/FLAIRS/article/view/135578},
doi = {10.32473/flairs.37.1.135578},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {There is a need for realistic Opposing Forces (OPFOR)behavior in military training simulations. Current trainingsimulations generally only have simple, non-adaptivebehaviors, requiring human instructors to play the role ofOPFOR in any complicated scenario. This poster addressesthis need by focusing on a specific scenario: trainingreinforcement learning agents to react to an ambush. Itproposes a novel way to check for occlusion algorithmically.It shows vector fields showing the agent’s actions throughthe course of a training run. It shows that a single agentswitching between multiple goals is possible, at least in asimplified environment. Such an approach could reduce theneed to develop different agents for different scenarios.Finally, it shows a competent agent trained on a simplifiedReact to Ambush scenario, demonstrating the plausibility ofa scaled-up version.},
keywords = {Simulation, VR},
pubstate = {published},
tppubtype = {article}
}
Liu, Lixing; Ustun, Volkan; Kumar, Rajay
Leveraging Organizational Hierarchy to Simplify Reward Design in Cooperative Multi-agent Reinforcement Learning Journal Article
In: FLAIRS, vol. 37, 2024, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags: Machine Learning
@article{liu_leveraging_2024,
title = {Leveraging Organizational Hierarchy to Simplify Reward Design in Cooperative Multi-agent Reinforcement Learning},
author = {Lixing Liu and Volkan Ustun and Rajay Kumar},
url = {https://journals.flvc.org/FLAIRS/article/view/135588},
doi = {10.32473/flairs.37.1.135588},
issn = {2334-0762},
year = {2024},
date = {2024-05-01},
urldate = {2024-08-13},
journal = {FLAIRS},
volume = {37},
abstract = {The effectiveness of multi-agent reinforcement learning (MARL) hinges largely on the meticulous arrangement of objectives. Yet, conventional MARL methods might not completely harness the inherent structures present in environmental states and agent relationships for goal organization. This study is conducted within the domain of military training simulations, which are typically characterized by complexity, heterogeneity, non-stationary and doctrine-driven environments with a clear organizational hierarchy and a top-down chain of command. This research investigates the approximation and integration of the organizational hierarchy into MARL for cooperative training scenarios, with the goal of streamlining the processes of reward engineering and enhancing team coordination. In the preliminary experiments, we employed two-tiered commander-subordinate feudal hierarchical (CSFH) networks to separate the prioritized team goal and individual goals. The empirical results demonstrate that the proposed framework enhances learning efficiency. It guarantees the learning of a prioritized policy for the commander agent and encourages subordinate agents to explore areas of interest more frequently, guided by appropriate soft constraints imposed by the commander.},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {article}
}
Lukin, Stephanie M; Bonial, Claire; Marge, Matthew; Hudson, Taylor; Hayes, Cory J.; Pollard, Kimberly; Baker, Anthony L.; Foots, Ashley; Artstein, Ron; Gervits, Felix; Abrams, Mitchell; Cassidy, Henry; Donatelli, Lucia; Leuski, Anton; Hill, Susan G.; Traum, David; Voss, Clare
SCOUT: A Situated and Multi-Modal Human-Robot Dialogue Corpus Journal Article
In: pp. 14445 - 144458, 2024.
Abstract | Links | BibTeX | Tags:
@article{lukin-etal-2024-scout-situated,
title = {SCOUT: A Situated and Multi-Modal Human-Robot Dialogue Corpus},
author = {Stephanie M Lukin and Claire Bonial and Matthew Marge and Taylor Hudson and Cory J. Hayes and Kimberly Pollard and Anthony L. Baker and Ashley Foots and Ron Artstein and Felix Gervits and Mitchell Abrams and Henry Cassidy and Lucia Donatelli and Anton Leuski and Susan G. Hill and David Traum and Clare Voss},
url = {https://aclanthology.org/2024.lrec-main.1259},
year = {2024},
date = {2024-05-01},
pages = {14445 - 144458},
abstract = {We introduce the Situated Corpus Of Understanding Transactions (SCOUT), a multi-modal collection of human-robot dialogue in the task domain of collaborative exploration. The corpus was constructed from multiple Wizard-of-Oz experiments where human participants gave verbal instructions to a remotely-located robot to move and gather information about its surroundings. SCOUT contains 89,056 utterances and 310,095 words from 278 dialogues averaging 320 utterances per dialogue. The dialogues are aligned with the multi-modal data streams available during the experiments: 5,785 images and 30 maps. The corpus has been annotated with Abstract Meaning Representation and Dialogue-AMR to identify the speaker’s intent and meaning within an utterance, and with Transactional Units and Relations to track relationships between utterances to reveal patterns of the Dialogue Structure. We describe how the corpus and its annotations have been used to develop autonomous human-robot systems and enable research in open questions of how humans speak to robots. We release this corpus to accelerate progress in autonomous, situated, human-robot dialogue, especially in the context of navigation tasks where details about the environment need to be discovered.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
West, Taylor Nicole; Prinzing, Michael; Garton, Catherine; Berman, Catherine J.; Zhou, Jieni; Hale, James; Gratch, Jonathan; Fredrickson, Barbara
2024.
Abstract | Links | BibTeX | Tags: Emotions, Virtual Humans
@misc{west_improving_2024,
title = {Improving Social Connection with Weak Ties and Strangers: Effects of a New Micro-Intervention on Interaction Quality and Social Behavior},
author = {Taylor Nicole West and Michael Prinzing and Catherine Garton and Catherine J. Berman and Jieni Zhou and James Hale and Jonathan Gratch and Barbara Fredrickson},
url = {https://osf.io/ytjr6},
doi = {10.31234/osf.io/ytjr6},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
abstract = {We propose that the emotional quality of people’s interactions with acquaintances (i.e., weak ties) and strangers contributes to well-being. We test whether a new micro-intervention can raise the quality of these interactions. We randomized young adults (N = 335) to this connectedness micro-intervention or a control intervention. Both interventions were delivered via a psychoeducational video followed by a brief conversation with a virtual human, with whom participants developed if-then plans to carry out their assigned behavioral goal. Pre-intervention, high-quality weak-tie and stranger interactions were associated with lower loneliness and greater mental health independent of strong-tie interaction quality. Experimental data showed the connectedness intervention improved the emotional quality of participants' interactions with weak ties and strangers over two days, evident in participants’ episodic self-reports and faster in-lab conversational response time. Discussion centers on implications for developing scalable behavioral interventions to improve well-being.},
keywords = {Emotions, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Zhang, Hao; Chang, Di; Li, Fang; Soleymani, Mohammad; Ahuja, Narendra
MagicPose4D: Crafting Articulated Models with Appearance and Motion Control Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: VGL, Virtual Humans
@misc{zhang_magicpose4d_2024,
title = {MagicPose4D: Crafting Articulated Models with Appearance and Motion Control},
author = {Hao Zhang and Di Chang and Fang Li and Mohammad Soleymani and Narendra Ahuja},
url = {https://arxiv.org/abs/2405.14017},
doi = {10.48550/ARXIV.2405.14017},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
publisher = {arXiv},
abstract = {With the success of 2D and 3D visual generative models, there is growing interest in generating 4D content. Existing methods primarily rely on text prompts to produce 4D content, but they often fall short of accurately defining complex or rare motions. To address this limitation, we propose MagicPose4D, a novel framework for refined control over both appearance and motion in 4D generation. Unlike traditional methods, MagicPose4D accepts monocular videos as motion prompts, enabling precise and customizable motion generation. MagicPose4D comprises two key modules:
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.},
note = {Version Number: 1},
keywords = {VGL, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
i) Dual-Phase 4D Reconstruction Modulevphantom which operates in two phases. The first phase focuses on capturing the model's shape using accurate 2D supervision and less accurate but geometrically informative 3D pseudo-supervision without imposing skeleton constraints. The second phase refines the model using more accurate pseudo-3D supervision, obtained in the first phase and introduces kinematic chain-based skeleton constraints to ensure physical plausibility. Additionally, we propose a Global-local Chamfer loss that aligns the overall distribution of predicted mesh vertices with the supervision while maintaining part-level alignment without extra annotations.
ii) Cross-category Motion Transfer Modulevphantom leverages the predictions from the 4D reconstruction module and uses a kinematic-chain-based skeleton to achieve cross-category motion transfer. It ensures smooth transitions between frames through dynamic rigidity, facilitating robust generalization without additional training.
Through extensive experiments, we demonstrate that MagicPose4D significantly improves the accuracy and consistency of 4D content generation, outperforming existing methods in various benchmarks.
Jones, Brennan; Xu, Yan; Li, Qisheng; Scherer, Stefan
Designing a Proactive Context-Aware AI Chatbot for People's Long-Term Goals Proceedings Article
In: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1–7, ACM, Honolulu HI USA, 2024, ISBN: 979-8-4007-0331-7.
Links | BibTeX | Tags: AI, Simulation
@inproceedings{jones_designing_2024,
title = {Designing a Proactive Context-Aware AI Chatbot for People's Long-Term Goals},
author = {Brennan Jones and Yan Xu and Qisheng Li and Stefan Scherer},
url = {https://dl.acm.org/doi/10.1145/3613905.3650912},
doi = {10.1145/3613905.3650912},
isbn = {979-8-4007-0331-7},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-25},
booktitle = {Extended Abstracts of the CHI Conference on Human Factors in Computing Systems},
pages = {1–7},
publisher = {ACM},
address = {Honolulu HI USA},
keywords = {AI, Simulation},
pubstate = {published},
tppubtype = {inproceedings}
}
Chemburkar, Ankur; Gordon, Andrew; Feng, Andrew
Evaluating Vision-Language Models on the TriangleCOPA Benchmark Journal Article
In: FLAIRS-37, vol. 37, 2024.
Abstract | BibTeX | Tags: DTIC, Narrative
@article{chemburkar_evaluating_2024,
title = {Evaluating Vision-Language Models on the TriangleCOPA Benchmark},
author = {Ankur Chemburkar and Andrew Gordon and Andrew Feng},
year = {2024},
date = {2024-05-01},
journal = {FLAIRS-37},
volume = {37},
abstract = {The TriangleCOPA benchmark consists of 100 textual questions with videos depicting the movements of simple shapes in the style of the classic social-psychology film created by Fritz Heider and Marianne Simmel in 1944. In our experiments, we investigate the performance of current vision-language models on this challenging benchmark, assessing the capability of these models for visual anthropomorphism and abstract interpretation.},
keywords = {DTIC, Narrative},
pubstate = {published},
tppubtype = {article}
}
Mozgai, Sharon A; Kaurloto, Cari; Winn, Jade G; Leeds, Andrew; Beland, Sarah; Sookiassian, Arman; Hartholt, Arno
Accelerating Scoping Reviews: A Case Study in the User-Centered Design of an AI-Enabled Interdisciplinary Research Tool Proceedings Article
In: Extended Abstracts of the CHI Conference on Human Factors in Computing Systems, pp. 1–8, ACM, Honolulu HI USA, 2024, ISBN: 979-8-4007-0331-7.
Links | BibTeX | Tags: AI, DTIC, UARC, Virtual Humans
@inproceedings{mozgai_accelerating_2024,
title = {Accelerating Scoping Reviews: A Case Study in the User-Centered Design of an AI-Enabled Interdisciplinary Research Tool},
author = {Sharon A Mozgai and Cari Kaurloto and Jade G Winn and Andrew Leeds and Sarah Beland and Arman Sookiassian and Arno Hartholt},
url = {https://dl.acm.org/doi/10.1145/3613905.3637110},
doi = {10.1145/3613905.3637110},
isbn = {979-8-4007-0331-7},
year = {2024},
date = {2024-05-01},
urldate = {2024-06-18},
booktitle = {Extended Abstracts of the CHI Conference on Human Factors in Computing Systems},
pages = {1–8},
publisher = {ACM},
address = {Honolulu HI USA},
keywords = {AI, DTIC, UARC, Virtual Humans},
pubstate = {published},
tppubtype = {inproceedings}
}
Murawski, Alaine; Ramirez‐Zohfeld, Vanessa; Mell, Johnathan; Tschoe, Marianne; Schierer, Allison; Olvera, Charles; Brett, Jeanne; Gratch, Jonathan; Lindquist, Lee A.
In: J American Geriatrics Society, vol. 72, no. 4, pp. 1112–1121, 2024, ISSN: 0002-8614, 1532-5415.
Abstract | Links | BibTeX | Tags:
@article{murawski_span_2024,
title = {<span style="font-variant:small-caps;">NegotiAge</span> : Development and pilot testing of an artificial intelligence‐based family caregiver negotiation program},
author = {Alaine Murawski and Vanessa Ramirez‐Zohfeld and Johnathan Mell and Marianne Tschoe and Allison Schierer and Charles Olvera and Jeanne Brett and Jonathan Gratch and Lee A. Lindquist},
url = {https://agsjournals.onlinelibrary.wiley.com/doi/10.1111/jgs.18775},
doi = {10.1111/jgs.18775},
issn = {0002-8614, 1532-5415},
year = {2024},
date = {2024-04-01},
urldate = {2024-12-05},
journal = {J American Geriatrics Society},
volume = {72},
number = {4},
pages = {1112–1121},
abstract = {Abstract
Background
Family caregivers of people with Alzheimer's disease experience conflicts as they navigate health care but lack training to resolve these disputes. We sought to develop and pilot test an artificial‐intelligence negotiation training program, NegotiAge, for family caregivers.
Methods
We convened negotiation experts, a geriatrician, a social worker, and community‐based family caregivers. Content matter experts created short videos to teach negotiation skills. Caregivers generated dialogue surrounding conflicts. Computer scientists utilized the dialogue with the Interactive Arbitration Guide Online (IAGO) platform to develop avatar‐based agents (e.g., sibling, older adult, physician) for caregivers to practice negotiating. Pilot testing was conducted with family caregivers to assess usability (USE) and satisfaction (open‐ended questions with thematic analysis).
Results
Development: With NegotiAge, caregivers progress through didactic material, then receive scenarios to negotiate (e.g., physician recommends gastric tube, sibling disagrees with home support, older adult refusing support). Caregivers negotiate in real‐time with avatars who are designed to act like humans, including emotional tactics and irrational behaviors. Caregivers send/receive offers, using tactics until either mutual agreement or time expires. Immediate feedback is generated for the user to improve skills training. Pilot testing: Family caregivers (
n = 12) completed the program and survey. USE questionnaire (Likert scale 1–7) subset scores revealed: (1) Useful—Mean 5.69 (SD 0.76); (2) Ease—Mean 5.24 (SD 0.96); (3) Learn—Mean 5.69 (SD 0.74); (4) Satisfy—Mean 5.62 (SD 1.10). Items that received over 80% agreements were: It helps me be more effective; It helps me be more productive; It is useful; It gives me more control over the activities in my life; It makes the things I want to accomplish easier to get done. Participants were highly satisfied and found NegotiAge fun to use (91.7%), with 100% who would recommend it to a friend.
Conclusion
NegotiAge is an Artificial‐Intelligent Caregiver Negotiation Program, that is usable and feasible for family caregivers to become familiar with negotiating conflicts commonly seen in health care.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Background
Family caregivers of people with Alzheimer's disease experience conflicts as they navigate health care but lack training to resolve these disputes. We sought to develop and pilot test an artificial‐intelligence negotiation training program, NegotiAge, for family caregivers.
Methods
We convened negotiation experts, a geriatrician, a social worker, and community‐based family caregivers. Content matter experts created short videos to teach negotiation skills. Caregivers generated dialogue surrounding conflicts. Computer scientists utilized the dialogue with the Interactive Arbitration Guide Online (IAGO) platform to develop avatar‐based agents (e.g., sibling, older adult, physician) for caregivers to practice negotiating. Pilot testing was conducted with family caregivers to assess usability (USE) and satisfaction (open‐ended questions with thematic analysis).
Results
Development: With NegotiAge, caregivers progress through didactic material, then receive scenarios to negotiate (e.g., physician recommends gastric tube, sibling disagrees with home support, older adult refusing support). Caregivers negotiate in real‐time with avatars who are designed to act like humans, including emotional tactics and irrational behaviors. Caregivers send/receive offers, using tactics until either mutual agreement or time expires. Immediate feedback is generated for the user to improve skills training. Pilot testing: Family caregivers (
n = 12) completed the program and survey. USE questionnaire (Likert scale 1–7) subset scores revealed: (1) Useful—Mean 5.69 (SD 0.76); (2) Ease—Mean 5.24 (SD 0.96); (3) Learn—Mean 5.69 (SD 0.74); (4) Satisfy—Mean 5.62 (SD 1.10). Items that received over 80% agreements were: It helps me be more effective; It helps me be more productive; It is useful; It gives me more control over the activities in my life; It makes the things I want to accomplish easier to get done. Participants were highly satisfied and found NegotiAge fun to use (91.7%), with 100% who would recommend it to a friend.
Conclusion
NegotiAge is an Artificial‐Intelligent Caregiver Negotiation Program, that is usable and feasible for family caregivers to become familiar with negotiating conflicts commonly seen in health care.
Ehsanpour, Mahsa; Reid, Ian; Rezatofighi, Hamid
Social-MAE: Social Masked Autoencoder for Multi-person Motion Representation Learning Miscellaneous
2024, (Version Number: 1).
Abstract | Links | BibTeX | Tags: Social Simulation
@misc{ehsanpour_social-mae_2024,
title = {Social-MAE: Social Masked Autoencoder for Multi-person Motion Representation Learning},
author = {Mahsa Ehsanpour and Ian Reid and Hamid Rezatofighi},
url = {https://arxiv.org/abs/2404.05578},
doi = {10.48550/ARXIV.2404.05578},
year = {2024},
date = {2024-04-01},
urldate = {2024-07-12},
publisher = {arXiv},
abstract = {For a complete comprehension of multi-person scenes, it is essential to go beyond basic tasks like detection and tracking. Higher-level tasks, such as understanding the interactions and social activities among individuals, are also crucial. Progress towards models that can fully understand scenes involving multiple people is hindered by a lack of sufficient annotated data for such high-level tasks. To address this challenge, we introduce Social-MAE, a simple yet effective transformer-based masked autoencoder framework for multi-person human motion data. The framework uses masked modeling to pre-train the encoder to reconstruct masked human joint trajectories, enabling it to learn generalizable and data efficient representations of motion in human crowded scenes. Social-MAE comprises a transformer as the MAE encoder and a lighter-weight transformer as the MAE decoder which operates on multi-person joints' trajectory in the frequency domain. After the reconstruction task, the MAE decoder is replaced with a task-specific decoder and the model is fine-tuned end-to-end for a variety of high-level social tasks. Our proposed model combined with our pre-training approach achieves the state-of-the-art results on various high-level social tasks, including multi-person pose forecasting, social grouping, and social action understanding. These improvements are demonstrated across four popular multi-person datasets encompassing both human 2D and 3D body pose.},
note = {Version Number: 1},
keywords = {Social Simulation},
pubstate = {published},
tppubtype = {misc}
}
Soleymani, Mohammad; Kumano, Shiro; Provost, Emily Mower; Bianchi-Berthouze, Nadia; Sano, Akane; Suzuki, Kenji
Guest Editorial Best of ACII 2021 Journal Article
In: IEEE Trans. Affective Comput., vol. 15, no. 2, pp. 376–379, 2024, ISSN: 1949-3045, 2371-9850.
@article{soleymani_guest_2024,
title = {Guest Editorial Best of ACII 2021},
author = {Mohammad Soleymani and Shiro Kumano and Emily Mower Provost and Nadia Bianchi-Berthouze and Akane Sano and Kenji Suzuki},
url = {https://ieeexplore.ieee.org/document/10542496/},
doi = {10.1109/TAFFC.2024.3389249},
issn = {1949-3045, 2371-9850},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
journal = {IEEE Trans. Affective Comput.},
volume = {15},
number = {2},
pages = {376–379},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Zhang, Hui; Kuang, Bingran; Zhao, Yajie
Camera Calibration using a Single View of a Symmetric Object Proceedings Article
In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2705–2709, IEEE, Seoul, Korea, Republic of, 2024, ISBN: 979-8-3503-4485-1.
Links | BibTeX | Tags: Graphics, VGL
@inproceedings{zhang_camera_2024,
title = {Camera Calibration using a Single View of a Symmetric Object},
author = {Hui Zhang and Bingran Kuang and Yajie Zhao},
url = {https://ieeexplore.ieee.org/document/10446005/},
doi = {10.1109/ICASSP48485.2024.10446005},
isbn = {979-8-3503-4485-1},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-25},
booktitle = {ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {2705–2709},
publisher = {IEEE},
address = {Seoul, Korea, Republic of},
keywords = {Graphics, VGL},
pubstate = {published},
tppubtype = {inproceedings}
}
Rizzo, Albert Skip; Hartholt, Arno; Mozgai, Sharon
Settling the Score: Virtual Reality as a Tool to Enhance Trauma-Focused Therapy for PTSD Book Section
In: Rich, Grant J.; Kumar, V. K.; Farley, Frank H. (Ed.): Handbook of Media Psychology, pp. 187–213, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-56536-6 978-3-031-56537-3.
Links | BibTeX | Tags: DTIC, MedVR, Simulation, VR
@incollection{rich_settling_2024,
title = {Settling the Score: Virtual Reality as a Tool to Enhance Trauma-Focused Therapy for PTSD},
author = {Albert Skip Rizzo and Arno Hartholt and Sharon Mozgai},
editor = {Grant J. Rich and V. K. Kumar and Frank H. Farley},
url = {https://link.springer.com/10.1007/978-3-031-56537-3_14},
doi = {10.1007/978-3-031-56537-3_14},
isbn = {978-3-031-56536-6 978-3-031-56537-3},
year = {2024},
date = {2024-04-01},
urldate = {2024-06-18},
booktitle = {Handbook of Media Psychology},
pages = {187–213},
publisher = {Springer Nature Switzerland},
address = {Cham},
keywords = {DTIC, MedVR, Simulation, VR},
pubstate = {published},
tppubtype = {incollection}
}
Goh, Crystal; Ma, Yu; Rizzo, Albert
Normative performance data on visual attention in neurotypical children: virtual reality assessment of cognitive and psychomotor development Journal Article
In: Front. Virtual Real., vol. 5, pp. 1309176, 2024, ISSN: 2673-4192.
Abstract | Links | BibTeX | Tags: MedVR
@article{goh_normative_2024,
title = {Normative performance data on visual attention in neurotypical children: virtual reality assessment of cognitive and psychomotor development},
author = {Crystal Goh and Yu Ma and Albert Rizzo},
url = {https://www.frontiersin.org/articles/10.3389/frvir.2024.1309176/full},
doi = {10.3389/frvir.2024.1309176},
issn = {2673-4192},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-16},
journal = {Front. Virtual Real.},
volume = {5},
pages = {1309176},
abstract = {Introduction:
Virtual Reality (VR) is revolutionizing healthcare research and practice by offering innovative methodologies across various clinical conditions. Advances in VR technology enable the creation of controllable, multisensory 3D environments, making it an appealing tool for capturing and quantifying behavior in realistic scenarios. This paper details the application of VR as a tool for neurocognitive evaluation, specifically in attention process assessment, an area of relevance for informing the diagnosis of childhood health conditions such as Attention Deficit Hyperactivity Disorder (ADHD).
Methods:
The data presented focuses on attention performance results from a large sample (
n = 837) of neurotypical male and female children (ages 6–13) tested on a visual continuous performance task, administered within an immersive VR classroom environment. This data was collected to create a normative baseline database for use to inform comparisons with the performances of children with ADHD to support diagnostic decision-making in this area.
Results:
Results indicate systematic improvements on most metrics across the age span, and sex differences are noted on key variables thought to reflect differential measures of hyperactivity and inattention in children with ADHD. Results support VR technology as a safe and viable option for testing attention processes in children, under stimulus conditions that closely mimic ecologically relevant challenges found in everyday life.
Discussion:
In response to these stimulus conditions, VR can support advanced methods for capturing and quantifying users’ behavioral responses. VR offers a more systematic and objective approach for clinical assessment and intervention and provides conceptual support for its use in a wide variety of healthcare contexts.},
keywords = {MedVR},
pubstate = {published},
tppubtype = {article}
}
Virtual Reality (VR) is revolutionizing healthcare research and practice by offering innovative methodologies across various clinical conditions. Advances in VR technology enable the creation of controllable, multisensory 3D environments, making it an appealing tool for capturing and quantifying behavior in realistic scenarios. This paper details the application of VR as a tool for neurocognitive evaluation, specifically in attention process assessment, an area of relevance for informing the diagnosis of childhood health conditions such as Attention Deficit Hyperactivity Disorder (ADHD).
Methods:
The data presented focuses on attention performance results from a large sample (
n = 837) of neurotypical male and female children (ages 6–13) tested on a visual continuous performance task, administered within an immersive VR classroom environment. This data was collected to create a normative baseline database for use to inform comparisons with the performances of children with ADHD to support diagnostic decision-making in this area.
Results:
Results indicate systematic improvements on most metrics across the age span, and sex differences are noted on key variables thought to reflect differential measures of hyperactivity and inattention in children with ADHD. Results support VR technology as a safe and viable option for testing attention processes in children, under stimulus conditions that closely mimic ecologically relevant challenges found in everyday life.
Discussion:
In response to these stimulus conditions, VR can support advanced methods for capturing and quantifying users’ behavioral responses. VR offers a more systematic and objective approach for clinical assessment and intervention and provides conceptual support for its use in a wide variety of healthcare contexts.
Soleymani, Mohammad; Rahmani, Mehdi; Bigdeli, Nooshin
Robust Tube-Based Reference Tracking Nonlinear Model Predictive Control for Wind Turbines Journal Article
In: IEEE Trans. Automat. Sci. Eng., pp. 1–13, 2024, ISSN: 1545-5955, 1558-3783.
@article{soleymani_robust_2024,
title = {Robust Tube-Based Reference Tracking Nonlinear Model Predictive Control for Wind Turbines},
author = {Mohammad Soleymani and Mehdi Rahmani and Nooshin Bigdeli},
url = {https://ieeexplore.ieee.org/document/10495787/},
doi = {10.1109/TASE.2024.3385714},
issn = {1545-5955, 1558-3783},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-16},
journal = {IEEE Trans. Automat. Sci. Eng.},
pages = {1–13},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gurney, Nikolos; Loewenstein, George; Chater, Nick
Conversational technology and reactions to withheld information Journal Article
In: PLoS ONE, vol. 19, no. 4, pp. e0301382, 2024, ISSN: 1932-6203.
Abstract | Links | BibTeX | Tags: DTIC, Social Simulation, UARC
@article{gurney_conversational_2024,
title = {Conversational technology and reactions to withheld information},
author = {Nikolos Gurney and George Loewenstein and Nick Chater},
editor = {Petre Caraiani},
url = {https://dx.plos.org/10.1371/journal.pone.0301382},
doi = {10.1371/journal.pone.0301382},
issn = {1932-6203},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-16},
journal = {PLoS ONE},
volume = {19},
number = {4},
pages = {e0301382},
abstract = {People frequently face decisions that require making inferences about withheld information. The advent of large language models coupled with conversational technology, e.g., Alexa, Siri, Cortana, and the Google Assistant, is changing the mode in which people make these inferences. We demonstrate that conversational modes of information provision, relative to traditional digital media, result in more critical responses to withheld information, including: (1) a reduction in evaluations of a product or service for which information is withheld and (2) an increased likelihood of recalling that information was withheld. These effects are robust across multiple conversational modes: a recorded phone conversation, an unfolding chat conversation, and a conversation script. We provide further evidence that these effects hold for conversations with the Google Assistant, a prominent conversational technology. The experimental results point to participants’ intuitions about why the information was withheld as the driver of the effect.},
keywords = {DTIC, Social Simulation, UARC},
pubstate = {published},
tppubtype = {article}
}
Hartholt, Arno; Leeds, Andrew; Fast, Ed; Sookiassian, Edwin; Kim, Kevin; Beland, Sarah; Kulkarni, Pranav; Mozgai, Sharon
Multidisciplinary Research & Development of Multi-Agents and Virtual Humans Leveraging Integrated Middleware Platforms Proceedings Article
In: 2024.
Abstract | Links | BibTeX | Tags: DTIC, UARC, Virtual Humans
@inproceedings{hartholt_multidisciplinary_2024,
title = {Multidisciplinary Research & Development of Multi-Agents and Virtual Humans Leveraging Integrated Middleware Platforms},
author = {Arno Hartholt and Andrew Leeds and Ed Fast and Edwin Sookiassian and Kevin Kim and Sarah Beland and Pranav Kulkarni and Sharon Mozgai},
url = {https://openaccess.cms-conferences.org/publications/book/978-1-958651-95-7/article/978-1-958651-95-7_33},
doi = {10.54941/ahfe1004497},
year = {2024},
date = {2024-04-01},
urldate = {2024-04-16},
abstract = {The current pace of technological advancements has led to an ever-increasing availability of technologies to investigate and help address the challenges that contemporary society faces today. However, while this trend increases the potential for creating more relevant, effective, and efficient solutions, it also inherently increases the complexity of realizing that potential. Our work aims to manage this complexity through the creation and dissemination of integrated middleware platforms that enable researchers and developers to rapidly prototype novel solutions within the areas of modelling & simulation, virtual humans, and virtual worlds. In this paper, we discuss two related platforms: the Rapid Integration & Development Environment (RIDE) and the Virtual Human Toolkit (VHToolkit). Specifically, we explore two use cases: 1) the development of an authoring tool aimed at domain experts to rapidly create low-echelon military training scenarios, and 2) the development of a virtual human led mHealth wellness and suicide prevention app for veterans.},
keywords = {DTIC, UARC, Virtual Humans},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi; Traum, David
Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains Proceedings Article
In: Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology, Sapporo, Japan, 2024.
@inproceedings{georgila_evaluation_2024,
title = {Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains},
author = {Kallirroi Georgila and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://kgeorgila.github.io/publications/georgila_iwsds24.pdf},
year = {2024},
date = {2024-03-01},
booktitle = {Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology},
address = {Sapporo, Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
DeTore, Nicole R.; Balogun-Mwangi, Oyenike; Eberlin, Elizabeth S.; Dokholyan, Katherine N.; Rizzo, Albert; Holt, Daphne J.
An Artificial Intelligence-Based Virtual Human Avatar Application to Assess the Mental Health of Health Care Professionals: A Validation Study Journal Article
In: Journal of Medical Extended Reality, vol. 1, no. 1, pp. 215–226, 2024, ISSN: 2994-1520.
@article{detore_artificial_2024,
title = {An Artificial Intelligence-Based Virtual Human Avatar Application to Assess the Mental Health of Health Care Professionals: A Validation Study},
author = {Nicole R. DeTore and Oyenike Balogun-Mwangi and Elizabeth S. Eberlin and Katherine N. Dokholyan and Albert Rizzo and Daphne J. Holt},
url = {https://www.liebertpub.com/doi/10.1089/jmxr.2024.0016},
doi = {10.1089/jmxr.2024.0016},
issn = {2994-1520},
year = {2024},
date = {2024-03-01},
urldate = {2024-11-01},
journal = {Journal of Medical Extended Reality},
volume = {1},
number = {1},
pages = {215–226},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Harris, Vera; Braggs, Robert; Traum, David
I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human. Proceedings Article
In: Sapporo Japan, 2024.
Links | BibTeX | Tags: Machine Learning
@inproceedings{harris_im_2024,
title = {I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human.},
author = {Vera Harris and Robert Braggs and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/23-harris-iwsds2024.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {Machine Learning},
pubstate = {published},
tppubtype = {inproceedings}
}
Brixey, Jacqueline; Traum, David
Why should a dialogue system speak more than one language? Proceedings Article
In: Sapporo Japan, 2024.
Links | BibTeX | Tags: Natural Language
@inproceedings{brixey_why_2024,
title = {Why should a dialogue system speak more than one language?},
author = {Jacqueline Brixey and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/24-Why%20should%20a%20dialogue%20system%20speak%20more%20than%20one%20language.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {Natural Language},
pubstate = {published},
tppubtype = {inproceedings}
}
Chen, Haiwei; Zhao, Yajie
Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting Miscellaneous
2024, (arXiv:2403.18186 [cs]).
Abstract | Links | BibTeX | Tags: VGL
@misc{chen_dont_2024,
title = {Don't Look into the Dark: Latent Codes for Pluralistic Image Inpainting},
author = {Haiwei Chen and Yajie Zhao},
url = {http://arxiv.org/abs/2403.18186},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {We present a method for large-mask pluralistic image inpainting based on the generative framework of discrete latent codes. Our method learns latent priors, discretized as tokens, by only performing computations at the visible locations of the image. This is realized by a restrictive partial encoder that predicts the token label for each visible block, a bidirectional transformer that infers the missing labels by only looking at these tokens, and a dedicated synthesis network that couples the tokens with the partial image priors to generate coherent and pluralistic complete image even under extreme mask settings. Experiments on public benchmarks validate our design choices as the proposed method outperforms strong baselines in both visual quality and diversity metrics.},
note = {arXiv:2403.18186 [cs]},
keywords = {VGL},
pubstate = {published},
tppubtype = {misc}
}
Singh, Ishika; Traum, David; Thomason, Jesse
TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models Miscellaneous
2024, (arXiv:2403.17246 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{singh_twostep_2024,
title = {TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models},
author = {Ishika Singh and David Traum and Jesse Thomason},
url = {http://arxiv.org/abs/2403.17246},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Classical planning formulations like the Planning Domain Definition Language (PDDL) admit action sequences guaranteed to achieve a goal state given an initial state if any are possible. However, reasoning problems defined in PDDL do not capture temporal aspects of action taking, for example that two agents in the domain can execute an action simultaneously if postconditions of each do not interfere with preconditions of the other. A human expert can decompose a goal into largely independent constituent parts and assign each agent to one of these subgoals to take advantage of simultaneous actions for faster execution of plan steps, each using only single agent planning. By contrast, large language models (LLMs) used for directly inferring plan steps do not guarantee execution success, but do leverage commonsense reasoning to assemble action sequences. We combine the strengths of classical planning and LLMs by approximating human intuitions for two-agent planning goal decomposition. We demonstrate that LLM-based goal decomposition leads to faster planning times than solving multi-agent PDDL problems directly while simultaneously achieving fewer plan execution steps than a single agent plan alone and preserving execution success. Additionally, we find that LLM-based approximations of subgoals can achieve similar multi-agent execution steps than those specified by human experts. Website and resources at https://glamor-usc.github.io/twostep},
note = {arXiv:2403.17246 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Gordon, Andrew S.; Feng, Andrew
Combining the Predictions of Out-of-Domain Classifiers Using Etcetera Abduction Proceedings Article
In: 2024 58th Annual Conference on Information Sciences and Systems (CISS), pp. 1–6, IEEE, Princeton, NJ, USA, 2024, ISBN: 979-8-3503-6929-8.
Links | BibTeX | Tags: DTIC, Narrative, The Narrative Group, UARC
@inproceedings{gordon_combining_2024,
title = {Combining the Predictions of Out-of-Domain Classifiers Using Etcetera Abduction},
author = {Andrew S. Gordon and Andrew Feng},
url = {https://ieeexplore.ieee.org/document/10480194/},
doi = {10.1109/CISS59072.2024.10480194},
isbn = {979-8-3503-6929-8},
year = {2024},
date = {2024-03-01},
urldate = {2024-04-16},
booktitle = {2024 58th Annual Conference on Information Sciences and Systems (CISS)},
pages = {1–6},
publisher = {IEEE},
address = {Princeton, NJ, USA},
keywords = {DTIC, Narrative, The Narrative Group, UARC},
pubstate = {published},
tppubtype = {inproceedings}
}
Frummet, Alexander; Speggiorin, Alessandro; Elsweiler, David; Leuski, Anton; Dalton, Jeff
Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant Journal Article
In: ACM Trans. Inf. Syst., pp. 3649500, 2024, ISSN: 1046-8188, 1558-2868.
Abstract | Links | BibTeX | Tags: DTIC, Natural Language, UARC
@article{frummet_cooking_2024,
title = {Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant},
author = {Alexander Frummet and Alessandro Speggiorin and David Elsweiler and Anton Leuski and Jeff Dalton},
url = {https://dl.acm.org/doi/10.1145/3649500},
doi = {10.1145/3649500},
issn = {1046-8188, 1558-2868},
year = {2024},
date = {2024-03-01},
urldate = {2024-04-16},
journal = {ACM Trans. Inf. Syst.},
pages = {3649500},
abstract = {We present two empirical studies to investigate users’ expectations and behaviours when using digital assistants, such as Alexa and Google Home, in a kitchen context: First, a survey (N=200) queries participants on their expectations for the kinds of information that such systems should be able to provide. While consensus exists on expecting information about cooking steps and processes, younger participants who enjoy cooking express a higher likelihood of expecting details on food history or the science of cooking. In a follow-up Wizard-of-Oz study (N = 48), users were guided through the steps of a recipe either by an
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.},
keywords = {DTIC, Natural Language, UARC},
pubstate = {published},
tppubtype = {article}
}
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.
Lu, Liupei; Yin, Yufeng; Gu, Yuming; Wu, Yizhen; Prasad, Pratusha; Zhao, Yajie; Soleymani, Mohammad
Leveraging Synthetic Data for Generalizable and Fair Facial Action Unit Detection Miscellaneous
2024, (arXiv:2403.10737 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, UARC, Virtual Humans
@misc{lu_leveraging_2024,
title = {Leveraging Synthetic Data for Generalizable and Fair Facial Action Unit Detection},
author = {Liupei Lu and Yufeng Yin and Yuming Gu and Yizhen Wu and Pratusha Prasad and Yajie Zhao and Mohammad Soleymani},
url = {http://arxiv.org/abs/2403.10737},
year = {2024},
date = {2024-03-01},
urldate = {2024-04-16},
publisher = {arXiv},
abstract = {Facial action unit (AU) detection is a fundamental block for objective facial expression analysis. Supervised learning approaches require a large amount of manual labeling which is costly. The limited labeled data are also not diverse in terms of gender which can affect model fairness. In this paper, we propose to use synthetically generated data and multi-source domain adaptation (MSDA) to address the problems of the scarcity of labeled data and the diversity of subjects. Specifically, we propose to generate a diverse dataset through synthetic facial expression re-targeting by transferring the expressions from real faces to synthetic avatars. Then, we use MSDA to transfer the AU detection knowledge from a real dataset and the synthetic dataset to a target dataset. Instead of aligning the overall distributions of different domains, we propose Paired Moment Matching (PM2) to align the features of the paired real and synthetic data with the same facial expression. To further improve gender fairness, PM2 matches the features of the real data with a female and a male synthetic image. Our results indicate that synthetic data and the proposed model improve both AU detection performance and fairness across genders, demonstrating its potential to solve AU detection in-the-wild.},
note = {arXiv:2403.10737 [cs]},
keywords = {DTIC, UARC, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}
Tran, Minh; Chang, Di; Siniukov, Maksim; Soleymani, Mohammad
Dyadic Interaction Modeling for Social Behavior Generation Miscellaneous
2024, (arXiv:2403.09069 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, UARC, Virtual Humans
@misc{tran_dyadic_2024,
title = {Dyadic Interaction Modeling for Social Behavior Generation},
author = {Minh Tran and Di Chang and Maksim Siniukov and Mohammad Soleymani},
url = {http://arxiv.org/abs/2403.09069},
year = {2024},
date = {2024-03-01},
urldate = {2024-03-19},
publisher = {arXiv},
abstract = {Human-human communication is like a delicate dance where listeners and speakers concurrently interact to maintain conversational dynamics. Hence, an effective model for generating listener nonverbal behaviors requires understanding the dyadic context and interaction. In this paper, we present an effective framework for creating 3D facial motions in dyadic interactions. Existing work consider a listener as a reactive agent with reflexive behaviors to the speaker's voice and facial motions. The heart of our framework is Dyadic Interaction Modeling (DIM), a pre-training approach that jointly models speakers' and listeners' motions through masking and contrastive learning to learn representations that capture the dyadic context. To enable the generation of non-deterministic behaviors, we encode both listener and speaker motions into discrete latent representations, through VQ-VAE. The pre-trained model is further fine-tuned for motion generation. Extensive experiments demonstrate the superiority of our framework in generating listener motions, establishing a new state-of-the-art according to the quantitative measures capturing the diversity and realism of generated motions. Qualitative results demonstrate the superior capabilities of the proposed approach in generating diverse and realistic expressions, eye blinks and head gestures.},
note = {arXiv:2403.09069 [cs]},
keywords = {DTIC, UARC, Virtual Humans},
pubstate = {published},
tppubtype = {misc}
}