Publications
Search
Kwon, Deuksin; Shrestha, Kaleen; Han, Bin; Lee, Elena Hayoung; Lucas, Gale
Evaluating Behavioral Alignment in Conflict Dialogue: A Multi-Dimensional Comparison of LLM Agents and Humans Miscellaneous
2025, (arXiv:2509.16394 [cs]).
@misc{kwon_evaluating_2025,
title = {Evaluating Behavioral Alignment in Conflict Dialogue: A Multi-Dimensional Comparison of LLM Agents and Humans},
author = {Deuksin Kwon and Kaleen Shrestha and Bin Han and Elena Hayoung Lee and Gale Lucas},
url = {http://arxiv.org/abs/2509.16394},
doi = {10.48550/arXiv.2509.16394},
year = {2025},
date = {2025-09-01},
urldate = {2025-09-25},
publisher = {arXiv},
abstract = {Large Language Models (LLMs) are increasingly deployed in socially complex, interaction-driven tasks, yet their ability to mirror human behavior in emotionally and strategically complex contexts remains underexplored. This study assesses the behavioral alignment of personality-prompted LLMs in adversarial dispute resolution by simulating multi-turn conflict dialogues that incorporate negotiation. Each LLM is guided by a matched Five-Factor personality profile to control for individual variation and enhance realism. We evaluate alignment across three dimensions: linguistic style, emotional expression (e.g., anger dynamics), and strategic behavior. GPT-4.1 achieves the closest alignment with humans in linguistic style and emotional dynamics, while Claude-3.7-Sonnet best reflects strategic behavior. Nonetheless, substantial alignment gaps persist. Our findings establish a benchmark for alignment between LLMs and humans in socially complex interactions, underscoring both the promise and the limitations of personality conditioning in dialogue modeling.},
note = {arXiv:2509.16394 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Hans, Soham; Gurney, Nikolos; Marsella, Stacy; Hirschmann, Sofia
Quantifying Loss Aversion in Cyber Adversaries via LLM Analysis Miscellaneous
2025, (arXiv:2508.13240 [cs]).
@misc{hans_quantifying_2025,
title = {Quantifying Loss Aversion in Cyber Adversaries via LLM Analysis},
author = {Soham Hans and Nikolos Gurney and Stacy Marsella and Sofia Hirschmann},
url = {http://arxiv.org/abs/2508.13240},
doi = {10.48550/arXiv.2508.13240},
year = {2025},
date = {2025-08-01},
urldate = {2025-09-18},
publisher = {arXiv},
abstract = {Understanding and quantifying human cognitive biases from empirical data has long posed a formidable challenge, particularly in cybersecurity, where defending against unknown adversaries is paramount. Traditional cyber defense strategies have largely focused on fortification, while some approaches attempt to anticipate attacker strategies by mapping them to cognitive vulnerabilities, yet they fall short in dynamically interpreting attacks in progress. In recognition of this gap, IARPA's ReSCIND program seeks to infer, defend against, and even exploit attacker cognitive traits. In this paper, we present a novel methodology that leverages large language models (LLMs) to extract quantifiable insights into the cognitive bias of loss aversion from hacker behavior. Our data are collected from an experiment in which hackers were recruited to attack a controlled demonstration network. We process the hacker generated notes using LLMs using it to segment the various actions and correlate the actions to predefined persistence mechanisms used by hackers. By correlating the implementation of these mechanisms with various operational triggers, our analysis provides new insights into how loss aversion manifests in hacker decision-making. The results demonstrate that LLMs can effectively dissect and interpret nuanced behavioral patterns, thereby offering a transformative approach to enhancing cyber defense strategies through real-time, behavior-based analysis.},
note = {arXiv:2508.13240 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Han, Bin; Kwon, Deuksin; Lin, Spencer; Shrestha, Kaleen; Gratch, Jonathan
Can LLMs Generate Behaviors for Embodied Virtual Agents Based on Personality Traits? Miscellaneous
2025, (arXiv:2508.21087 [cs]).
@misc{han_can_2025,
title = {Can LLMs Generate Behaviors for Embodied Virtual Agents Based on Personality Traits?},
author = {Bin Han and Deuksin Kwon and Spencer Lin and Kaleen Shrestha and Jonathan Gratch},
url = {http://arxiv.org/abs/2508.21087},
doi = {10.48550/arXiv.2508.21087},
year = {2025},
date = {2025-08-01},
urldate = {2025-09-18},
publisher = {arXiv},
abstract = {This study proposes a framework that employs personality prompting with Large Language Models to generate verbal and nonverbal behaviors for virtual agents based on personality traits. Focusing on extraversion, we evaluated the system in two scenarios: negotiation and ice breaking, using both introverted and extroverted agents. In Experiment 1, we conducted agent to agent simulations and performed linguistic analysis and personality classification to assess whether the LLM generated language reflected the intended traits and whether the corresponding nonverbal behaviors varied by personality. In Experiment 2, we carried out a user study to evaluate whether these personality aligned behaviors were consistent with their intended traits and perceptible to human observers. Our results show that LLMs can generate verbal and nonverbal behaviors that align with personality traits, and that users are able to recognize these traits through the agents' behaviors. This work underscores the potential of LLMs in shaping personality aligned virtual agents.},
note = {arXiv:2508.21087 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Traum, David; Brixey, Jacqueline
Does a code-switching dialogue system help users learn conversational fluency in Choctaw? Journal Article
In: Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP), pp. 8-17, 2025, ISBN: 979-8-89176-236-7.
@article{brixey-traum-2025-code,
title = {Does a code-switching dialogue system help users learn conversational fluency in Choctaw?},
author = {David Traum and Jacqueline Brixey},
url = {https://aclanthology.org/2025.americasnlp-1.2/},
doi = {10.18653/v1/2025.americasnlp-1.2},
isbn = {979-8-89176-236-7},
year = {2025},
date = {2025-05-05},
urldate = {2025-05-05},
journal = {Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)},
pages = {8-17},
publisher = {Association for Computational Linguistics},
address = {Albuquerque, New Mexico},
abstract = {We investigate the learning outcomes and user response to a chatbot for practicing conversational Choctaw, an endangered American Indigenous language. Conversational fluency is a goal for many language learners, however, for learners of endangered languages in North America, access to fluent speakers may be limited. Chatbots are potentially ideal dialogue partners as this kind of dialogue system fulfills a non-authoritative role by focusing on carrying on a conversation as an equal conversational partner. The goal of the chatbot investigated in this work is to serve as a conversational partner in the absence of a fluent Choctaw-speaking human interlocutor. We investigate the impact of code-switching in the interaction, comparing a bilingual chatbot against a monolingual Choctaw version. We evaluate the systems for user engagement and enjoyment, as well as gains in conversational fluency from interacting with the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chaubey, Ashutosh; Guan, Xulang; Soleymani, Mohammad
Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning Miscellaneous
2025, (Version Number: 1).
@misc{chaubey_face-llava_2025,
title = {Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning},
author = {Ashutosh Chaubey and Xulang Guan and Mohammad Soleymani},
url = {https://arxiv.org/abs/2504.07198},
doi = {10.48550/ARXIV.2504.07198},
year = {2025},
date = {2025-04-01},
urldate = {2025-04-15},
publisher = {arXiv},
abstract = {The human face plays a central role in social communication, necessitating the use of performant computer vision tools for human-centered applications. We propose Face-LLaVA, a multimodal large language model for face-centered, in-context learning, including facial expression and attribute recognition. Additionally, Face-LLaVA is able to generate natural language descriptions that can be used for reasoning. Leveraging existing visual databases, we first developed FaceInstruct-1M, a face-centered database for instruction tuning MLLMs for face processing. We then developed a novel face-specific visual encoder powered by Face-Region Guided Cross-Attention that integrates face geometry with local visual features. We evaluated the proposed method across nine different datasets and five different face processing tasks, including facial expression recognition, action unit detection, facial attribute detection, age estimation and deepfake detection. Face-LLaVA achieves superior results compared to existing open-source MLLMs and competitive performance compared to commercial solutions. Our model output also receives a higher reasoning rating by GPT under a zero-shot setting across all the tasks. Both our dataset and model wil be released at https://face-llava.github.io to support future advancements in social AI and foundational vision-language research.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Brun, Antonin; Liu, Ruying; Shukla, Aryan; Watson, Frances; Gratch, Jonathan
Exploring Emotion-Sensitive LLM-Based Conversational AI Miscellaneous
2025, (arXiv:2502.08920 [cs]).
@misc{brun_exploring_2025,
title = {Exploring Emotion-Sensitive LLM-Based Conversational AI},
author = {Antonin Brun and Ruying Liu and Aryan Shukla and Frances Watson and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.08920},
doi = {10.48550/arXiv.2502.08920},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Conversational AI chatbots have become increasingly common within the customer service industry. Despite improvements in their emotional development, they often lack the authenticity of real customer service interactions or the competence of service providers. By comparing emotion-sensitive and emotion-insensitive LLM-based chatbots across 30 participants, we aim to explore how emotional sensitivity in chatbots influences perceived competence and overall customer satisfaction in service interactions. Additionally, we employ sentiment analysis techniques to analyze and interpret the emotional content of user inputs. We highlight that perceptions of chatbot trustworthiness and competence were higher in the case of the emotion-sensitive chatbot, even if issue resolution rates were not affected. We discuss implications of improved user satisfaction from emotion-sensitive chatbots and potential applications in support services.},
note = {arXiv:2502.08920 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Tak, Ala N.; Banayeeanzade, Amin; Bolourani, Anahita; Kian, Mina; Jia, Robin; Gratch, Jonathan
Mechanistic Interpretability of Emotion Inference in Large Language Models Miscellaneous
2025, (arXiv:2502.05489 [cs]).
@misc{tak_mechanistic_2025,
title = {Mechanistic Interpretability of Emotion Inference in Large Language Models},
author = {Ala N. Tak and Amin Banayeeanzade and Anahita Bolourani and Mina Kian and Robin Jia and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.05489},
doi = {10.48550/arXiv.2502.05489},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Large language models (LLMs) show promising capabilities in predicting human emotions from text. However, the mechanisms through which these models process emotional stimuli remain largely unexplored. Our study addresses this gap by investigating how autoregressive LLMs infer emotions, showing that emotion representations are functionally localized to specific regions in the model. Our evaluation includes diverse model families and sizes and is supported by robustness checks. We then show that the identified representations are psychologically plausible by drawing on cognitive appraisal theory, a well-established psychological framework positing that emotions emerge from evaluations (appraisals) of environmental stimuli. By causally intervening on construed appraisal concepts, we steer the generation and show that the outputs align with theoretical and intuitive expectations. This work highlights a novel way to causally intervene and precisely shape emotional text generation, potentially benefiting safety and alignment in sensitive affective domains.},
note = {arXiv:2502.05489 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Xue, Jintang; Zhao, Ganning; Yao, Jie-En; Chen, Hong-En; Hu, Yue; Chen, Meida; You, Suya; Kuo, C. -C. Jay
Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions Miscellaneous
2025, (Version Number: 1).
@misc{xue_descrip3d_2025,
title = {Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions},
author = {Jintang Xue and Ganning Zhao and Jie-En Yao and Hong-En Chen and Yue Hu and Meida Chen and Suya You and C. -C. Jay Kuo},
url = {https://arxiv.org/abs/2507.14555},
doi = {10.48550/ARXIV.2507.14555},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
publisher = {arXiv},
abstract = {Understanding 3D scenes goes beyond simply recognizing objects; it requires reasoning about the spatial and semantic relationships between them. Current 3D scene-language models often struggle with this relational understanding, particularly when visual embeddings alone do not adequately convey the roles and interactions of objects. In this paper, we introduce Descrip3D, a novel and powerful framework that explicitly encodes the relationships between objects using natural language. Unlike previous methods that rely only on 2D and 3D embeddings, Descrip3D enhances each object with a textual description that captures both its intrinsic attributes and contextual relationships. These relational cues are incorporated into the model through a dual-level integration: embedding fusion and prompt-level injection. This allows for unified reasoning across various tasks such as grounding, captioning, and question answering, all without the need for task-specific heads or additional supervision. When evaluated on five benchmark datasets, including ScanRefer, Multi3DRefer, ScanQA, SQA3D, and Scan2Cap, Descrip3D consistently outperforms strong baseline models, demonstrating the effectiveness of language-guided relational representation for understanding complex indoor scenes.},
note = {Version Number: 1},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Tak, Ala N.; Gratch, Jonathan; Scherer, Klaus R.
Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models Journal Article
In: IEEE Trans. Affective Comput., pp. 1–11, 2025, ISSN: 1949-3045, 2371-9850.
@article{tak_aware_2025,
title = {Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models},
author = {Ala N. Tak and Jonathan Gratch and Klaus R. Scherer},
url = {https://ieeexplore.ieee.org/document/11045290/},
doi = {10.1109/TAFFC.2025.3581461},
issn = {1949-3045, 2371-9850},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
journal = {IEEE Trans. Affective Comput.},
pages = {1–11},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Addison, Parker; Nguyen, Minh-Tuan H.; Medan, Tomislav; Shah, Jinali; Manzari, Mohammad T.; McElrone, Brendan; Lalwani, Laksh; More, Aboli; Sharma, Smita; Roth, Holger R.; Yang, Isaac; Chen, Chester; Xu, Daguang; Cheng, Yan; Feng, Andrew; Xu, Ziyue
C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System Miscellaneous
2024, (arXiv:2412.13163 [cs]).
@misc{addison_c-fedrag_2024,
title = {C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System},
author = {Parker Addison and Minh-Tuan H. Nguyen and Tomislav Medan and Jinali Shah and Mohammad T. Manzari and Brendan McElrone and Laksh Lalwani and Aboli More and Smita Sharma and Holger R. Roth and Isaac Yang and Chester Chen and Daguang Xu and Yan Cheng and Andrew Feng and Ziyue Xu},
url = {http://arxiv.org/abs/2412.13163},
doi = {10.48550/arXiv.2412.13163},
year = {2024},
date = {2024-12-01},
urldate = {2025-03-20},
publisher = {arXiv},
abstract = {Organizations seeking to utilize Large Language Models (LLMs) for knowledge querying and analysis often encounter challenges in maintaining an LLM fine-tuned on targeted, up-to-date information that keeps answers relevant and grounded. Retrieval Augmented Generation (RAG) has quickly become a feasible solution for organizations looking to overcome the challenges of maintaining proprietary models and to help reduce LLM hallucinations in their query responses. However, RAG comes with its own issues regarding scaling data pipelines across tiered-access and disparate data sources. In many scenarios, it is necessary to query beyond a single data silo to provide richer and more relevant context for an LLM. Analyzing data sources within and across organizational trust boundaries is often limited by complex data-sharing policies that prohibit centralized data storage, therefore, inhibit the fast and effective setup and scaling of RAG solutions. In this paper, we introduce Confidential Computing (CC) techniques as a solution for secure Federated Retrieval Augmented Generation (FedRAG). Our proposed Confidential FedRAG system (C-FedRAG) enables secure connection and scaling of a RAG workflows across a decentralized network of data providers by ensuring context confidentiality. We also demonstrate how to implement a C-FedRAG system using the NVIDIA FLARE SDK and assess its performance using the MedRAG toolkit and MIRAGE benchmarking dataset.},
note = {arXiv:2412.13163 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Filter
2025
Kwon, Deuksin; Shrestha, Kaleen; Han, Bin; Lee, Elena Hayoung; Lucas, Gale
Evaluating Behavioral Alignment in Conflict Dialogue: A Multi-Dimensional Comparison of LLM Agents and Humans Miscellaneous
2025, (arXiv:2509.16394 [cs]).
Abstract | Links | BibTeX | Tags: AI, DTIC, LLM
@misc{kwon_evaluating_2025,
title = {Evaluating Behavioral Alignment in Conflict Dialogue: A Multi-Dimensional Comparison of LLM Agents and Humans},
author = {Deuksin Kwon and Kaleen Shrestha and Bin Han and Elena Hayoung Lee and Gale Lucas},
url = {http://arxiv.org/abs/2509.16394},
doi = {10.48550/arXiv.2509.16394},
year = {2025},
date = {2025-09-01},
urldate = {2025-09-25},
publisher = {arXiv},
abstract = {Large Language Models (LLMs) are increasingly deployed in socially complex, interaction-driven tasks, yet their ability to mirror human behavior in emotionally and strategically complex contexts remains underexplored. This study assesses the behavioral alignment of personality-prompted LLMs in adversarial dispute resolution by simulating multi-turn conflict dialogues that incorporate negotiation. Each LLM is guided by a matched Five-Factor personality profile to control for individual variation and enhance realism. We evaluate alignment across three dimensions: linguistic style, emotional expression (e.g., anger dynamics), and strategic behavior. GPT-4.1 achieves the closest alignment with humans in linguistic style and emotional dynamics, while Claude-3.7-Sonnet best reflects strategic behavior. Nonetheless, substantial alignment gaps persist. Our findings establish a benchmark for alignment between LLMs and humans in socially complex interactions, underscoring both the promise and the limitations of personality conditioning in dialogue modeling.},
note = {arXiv:2509.16394 [cs]},
keywords = {AI, DTIC, LLM},
pubstate = {published},
tppubtype = {misc}
}
Hans, Soham; Gurney, Nikolos; Marsella, Stacy; Hirschmann, Sofia
Quantifying Loss Aversion in Cyber Adversaries via LLM Analysis Miscellaneous
2025, (arXiv:2508.13240 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, LLM
@misc{hans_quantifying_2025,
title = {Quantifying Loss Aversion in Cyber Adversaries via LLM Analysis},
author = {Soham Hans and Nikolos Gurney and Stacy Marsella and Sofia Hirschmann},
url = {http://arxiv.org/abs/2508.13240},
doi = {10.48550/arXiv.2508.13240},
year = {2025},
date = {2025-08-01},
urldate = {2025-09-18},
publisher = {arXiv},
abstract = {Understanding and quantifying human cognitive biases from empirical data has long posed a formidable challenge, particularly in cybersecurity, where defending against unknown adversaries is paramount. Traditional cyber defense strategies have largely focused on fortification, while some approaches attempt to anticipate attacker strategies by mapping them to cognitive vulnerabilities, yet they fall short in dynamically interpreting attacks in progress. In recognition of this gap, IARPA's ReSCIND program seeks to infer, defend against, and even exploit attacker cognitive traits. In this paper, we present a novel methodology that leverages large language models (LLMs) to extract quantifiable insights into the cognitive bias of loss aversion from hacker behavior. Our data are collected from an experiment in which hackers were recruited to attack a controlled demonstration network. We process the hacker generated notes using LLMs using it to segment the various actions and correlate the actions to predefined persistence mechanisms used by hackers. By correlating the implementation of these mechanisms with various operational triggers, our analysis provides new insights into how loss aversion manifests in hacker decision-making. The results demonstrate that LLMs can effectively dissect and interpret nuanced behavioral patterns, thereby offering a transformative approach to enhancing cyber defense strategies through real-time, behavior-based analysis.},
note = {arXiv:2508.13240 [cs]},
keywords = {DTIC, LLM},
pubstate = {published},
tppubtype = {misc}
}
Han, Bin; Kwon, Deuksin; Lin, Spencer; Shrestha, Kaleen; Gratch, Jonathan
Can LLMs Generate Behaviors for Embodied Virtual Agents Based on Personality Traits? Miscellaneous
2025, (arXiv:2508.21087 [cs]).
Abstract | Links | BibTeX | Tags: DTIC?, LLM
@misc{han_can_2025,
title = {Can LLMs Generate Behaviors for Embodied Virtual Agents Based on Personality Traits?},
author = {Bin Han and Deuksin Kwon and Spencer Lin and Kaleen Shrestha and Jonathan Gratch},
url = {http://arxiv.org/abs/2508.21087},
doi = {10.48550/arXiv.2508.21087},
year = {2025},
date = {2025-08-01},
urldate = {2025-09-18},
publisher = {arXiv},
abstract = {This study proposes a framework that employs personality prompting with Large Language Models to generate verbal and nonverbal behaviors for virtual agents based on personality traits. Focusing on extraversion, we evaluated the system in two scenarios: negotiation and ice breaking, using both introverted and extroverted agents. In Experiment 1, we conducted agent to agent simulations and performed linguistic analysis and personality classification to assess whether the LLM generated language reflected the intended traits and whether the corresponding nonverbal behaviors varied by personality. In Experiment 2, we carried out a user study to evaluate whether these personality aligned behaviors were consistent with their intended traits and perceptible to human observers. Our results show that LLMs can generate verbal and nonverbal behaviors that align with personality traits, and that users are able to recognize these traits through the agents' behaviors. This work underscores the potential of LLMs in shaping personality aligned virtual agents.},
note = {arXiv:2508.21087 [cs]},
keywords = {DTIC?, LLM},
pubstate = {published},
tppubtype = {misc}
}
Traum, David; Brixey, Jacqueline
Does a code-switching dialogue system help users learn conversational fluency in Choctaw? Journal Article
In: Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP), pp. 8-17, 2025, ISBN: 979-8-89176-236-7.
Abstract | Links | BibTeX | Tags: Learning Sciences, LLM
@article{brixey-traum-2025-code,
title = {Does a code-switching dialogue system help users learn conversational fluency in Choctaw?},
author = {David Traum and Jacqueline Brixey},
url = {https://aclanthology.org/2025.americasnlp-1.2/},
doi = {10.18653/v1/2025.americasnlp-1.2},
isbn = {979-8-89176-236-7},
year = {2025},
date = {2025-05-05},
urldate = {2025-05-05},
journal = {Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)},
pages = {8-17},
publisher = {Association for Computational Linguistics},
address = {Albuquerque, New Mexico},
abstract = {We investigate the learning outcomes and user response to a chatbot for practicing conversational Choctaw, an endangered American Indigenous language. Conversational fluency is a goal for many language learners, however, for learners of endangered languages in North America, access to fluent speakers may be limited. Chatbots are potentially ideal dialogue partners as this kind of dialogue system fulfills a non-authoritative role by focusing on carrying on a conversation as an equal conversational partner. The goal of the chatbot investigated in this work is to serve as a conversational partner in the absence of a fluent Choctaw-speaking human interlocutor. We investigate the impact of code-switching in the interaction, comparing a bilingual chatbot against a monolingual Choctaw version. We evaluate the systems for user engagement and enjoyment, as well as gains in conversational fluency from interacting with the system.},
keywords = {Learning Sciences, LLM},
pubstate = {published},
tppubtype = {article}
}
Chaubey, Ashutosh; Guan, Xulang; Soleymani, Mohammad
Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: DTIC, LLM
@misc{chaubey_face-llava_2025,
title = {Face-LLaVA: Facial Expression and Attribute Understanding through Instruction Tuning},
author = {Ashutosh Chaubey and Xulang Guan and Mohammad Soleymani},
url = {https://arxiv.org/abs/2504.07198},
doi = {10.48550/ARXIV.2504.07198},
year = {2025},
date = {2025-04-01},
urldate = {2025-04-15},
publisher = {arXiv},
abstract = {The human face plays a central role in social communication, necessitating the use of performant computer vision tools for human-centered applications. We propose Face-LLaVA, a multimodal large language model for face-centered, in-context learning, including facial expression and attribute recognition. Additionally, Face-LLaVA is able to generate natural language descriptions that can be used for reasoning. Leveraging existing visual databases, we first developed FaceInstruct-1M, a face-centered database for instruction tuning MLLMs for face processing. We then developed a novel face-specific visual encoder powered by Face-Region Guided Cross-Attention that integrates face geometry with local visual features. We evaluated the proposed method across nine different datasets and five different face processing tasks, including facial expression recognition, action unit detection, facial attribute detection, age estimation and deepfake detection. Face-LLaVA achieves superior results compared to existing open-source MLLMs and competitive performance compared to commercial solutions. Our model output also receives a higher reasoning rating by GPT under a zero-shot setting across all the tasks. Both our dataset and model wil be released at https://face-llava.github.io to support future advancements in social AI and foundational vision-language research.},
note = {Version Number: 1},
keywords = {DTIC, LLM},
pubstate = {published},
tppubtype = {misc}
}
Brun, Antonin; Liu, Ruying; Shukla, Aryan; Watson, Frances; Gratch, Jonathan
Exploring Emotion-Sensitive LLM-Based Conversational AI Miscellaneous
2025, (arXiv:2502.08920 [cs]).
Abstract | Links | BibTeX | Tags: AI, LLM
@misc{brun_exploring_2025,
title = {Exploring Emotion-Sensitive LLM-Based Conversational AI},
author = {Antonin Brun and Ruying Liu and Aryan Shukla and Frances Watson and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.08920},
doi = {10.48550/arXiv.2502.08920},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Conversational AI chatbots have become increasingly common within the customer service industry. Despite improvements in their emotional development, they often lack the authenticity of real customer service interactions or the competence of service providers. By comparing emotion-sensitive and emotion-insensitive LLM-based chatbots across 30 participants, we aim to explore how emotional sensitivity in chatbots influences perceived competence and overall customer satisfaction in service interactions. Additionally, we employ sentiment analysis techniques to analyze and interpret the emotional content of user inputs. We highlight that perceptions of chatbot trustworthiness and competence were higher in the case of the emotion-sensitive chatbot, even if issue resolution rates were not affected. We discuss implications of improved user satisfaction from emotion-sensitive chatbots and potential applications in support services.},
note = {arXiv:2502.08920 [cs]},
keywords = {AI, LLM},
pubstate = {published},
tppubtype = {misc}
}
Tak, Ala N.; Banayeeanzade, Amin; Bolourani, Anahita; Kian, Mina; Jia, Robin; Gratch, Jonathan
Mechanistic Interpretability of Emotion Inference in Large Language Models Miscellaneous
2025, (arXiv:2502.05489 [cs]).
Abstract | Links | BibTeX | Tags: DTIC, LLM
@misc{tak_mechanistic_2025,
title = {Mechanistic Interpretability of Emotion Inference in Large Language Models},
author = {Ala N. Tak and Amin Banayeeanzade and Anahita Bolourani and Mina Kian and Robin Jia and Jonathan Gratch},
url = {http://arxiv.org/abs/2502.05489},
doi = {10.48550/arXiv.2502.05489},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-20},
publisher = {arXiv},
abstract = {Large language models (LLMs) show promising capabilities in predicting human emotions from text. However, the mechanisms through which these models process emotional stimuli remain largely unexplored. Our study addresses this gap by investigating how autoregressive LLMs infer emotions, showing that emotion representations are functionally localized to specific regions in the model. Our evaluation includes diverse model families and sizes and is supported by robustness checks. We then show that the identified representations are psychologically plausible by drawing on cognitive appraisal theory, a well-established psychological framework positing that emotions emerge from evaluations (appraisals) of environmental stimuli. By causally intervening on construed appraisal concepts, we steer the generation and show that the outputs align with theoretical and intuitive expectations. This work highlights a novel way to causally intervene and precisely shape emotional text generation, potentially benefiting safety and alignment in sensitive affective domains.},
note = {arXiv:2502.05489 [cs]},
keywords = {DTIC, LLM},
pubstate = {published},
tppubtype = {misc}
}
Xue, Jintang; Zhao, Ganning; Yao, Jie-En; Chen, Hong-En; Hu, Yue; Chen, Meida; You, Suya; Kuo, C. -C. Jay
Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions Miscellaneous
2025, (Version Number: 1).
Abstract | Links | BibTeX | Tags: LLM
@misc{xue_descrip3d_2025,
title = {Descrip3D: Enhancing Large Language Model-based 3D Scene Understanding with Object-Level Text Descriptions},
author = {Jintang Xue and Ganning Zhao and Jie-En Yao and Hong-En Chen and Yue Hu and Meida Chen and Suya You and C. -C. Jay Kuo},
url = {https://arxiv.org/abs/2507.14555},
doi = {10.48550/ARXIV.2507.14555},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
publisher = {arXiv},
abstract = {Understanding 3D scenes goes beyond simply recognizing objects; it requires reasoning about the spatial and semantic relationships between them. Current 3D scene-language models often struggle with this relational understanding, particularly when visual embeddings alone do not adequately convey the roles and interactions of objects. In this paper, we introduce Descrip3D, a novel and powerful framework that explicitly encodes the relationships between objects using natural language. Unlike previous methods that rely only on 2D and 3D embeddings, Descrip3D enhances each object with a textual description that captures both its intrinsic attributes and contextual relationships. These relational cues are incorporated into the model through a dual-level integration: embedding fusion and prompt-level injection. This allows for unified reasoning across various tasks such as grounding, captioning, and question answering, all without the need for task-specific heads or additional supervision. When evaluated on five benchmark datasets, including ScanRefer, Multi3DRefer, ScanQA, SQA3D, and Scan2Cap, Descrip3D consistently outperforms strong baseline models, demonstrating the effectiveness of language-guided relational representation for understanding complex indoor scenes.},
note = {Version Number: 1},
keywords = {LLM},
pubstate = {published},
tppubtype = {misc}
}
Tak, Ala N.; Gratch, Jonathan; Scherer, Klaus R.
Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models Journal Article
In: IEEE Trans. Affective Comput., pp. 1–11, 2025, ISSN: 1949-3045, 2371-9850.
Links | BibTeX | Tags: Emotions, LLM
@article{tak_aware_2025,
title = {Aware yet Biased: Investigating Emotional Reasoning and Appraisal Bias in Large Language Models},
author = {Ala N. Tak and Jonathan Gratch and Klaus R. Scherer},
url = {https://ieeexplore.ieee.org/document/11045290/},
doi = {10.1109/TAFFC.2025.3581461},
issn = {1949-3045, 2371-9850},
year = {2025},
date = {2025-01-01},
urldate = {2025-08-19},
journal = {IEEE Trans. Affective Comput.},
pages = {1–11},
keywords = {Emotions, LLM},
pubstate = {published},
tppubtype = {article}
}
2024
Addison, Parker; Nguyen, Minh-Tuan H.; Medan, Tomislav; Shah, Jinali; Manzari, Mohammad T.; McElrone, Brendan; Lalwani, Laksh; More, Aboli; Sharma, Smita; Roth, Holger R.; Yang, Isaac; Chen, Chester; Xu, Daguang; Cheng, Yan; Feng, Andrew; Xu, Ziyue
C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System Miscellaneous
2024, (arXiv:2412.13163 [cs]).
Abstract | Links | BibTeX | Tags: LLM
@misc{addison_c-fedrag_2024,
title = {C-FedRAG: A Confidential Federated Retrieval-Augmented Generation System},
author = {Parker Addison and Minh-Tuan H. Nguyen and Tomislav Medan and Jinali Shah and Mohammad T. Manzari and Brendan McElrone and Laksh Lalwani and Aboli More and Smita Sharma and Holger R. Roth and Isaac Yang and Chester Chen and Daguang Xu and Yan Cheng and Andrew Feng and Ziyue Xu},
url = {http://arxiv.org/abs/2412.13163},
doi = {10.48550/arXiv.2412.13163},
year = {2024},
date = {2024-12-01},
urldate = {2025-03-20},
publisher = {arXiv},
abstract = {Organizations seeking to utilize Large Language Models (LLMs) for knowledge querying and analysis often encounter challenges in maintaining an LLM fine-tuned on targeted, up-to-date information that keeps answers relevant and grounded. Retrieval Augmented Generation (RAG) has quickly become a feasible solution for organizations looking to overcome the challenges of maintaining proprietary models and to help reduce LLM hallucinations in their query responses. However, RAG comes with its own issues regarding scaling data pipelines across tiered-access and disparate data sources. In many scenarios, it is necessary to query beyond a single data silo to provide richer and more relevant context for an LLM. Analyzing data sources within and across organizational trust boundaries is often limited by complex data-sharing policies that prohibit centralized data storage, therefore, inhibit the fast and effective setup and scaling of RAG solutions. In this paper, we introduce Confidential Computing (CC) techniques as a solution for secure Federated Retrieval Augmented Generation (FedRAG). Our proposed Confidential FedRAG system (C-FedRAG) enables secure connection and scaling of a RAG workflows across a decentralized network of data providers by ensuring context confidentiality. We also demonstrate how to implement a C-FedRAG system using the NVIDIA FLARE SDK and assess its performance using the MedRAG toolkit and MIRAGE benchmarking dataset.},
note = {arXiv:2412.13163 [cs]},
keywords = {LLM},
pubstate = {published},
tppubtype = {misc}
}
0000
[No title] Journal Article
In: 0000.
BibTeX | Tags: Learning Sciences, LLM
@article{nokey,
title = {[No title]},
keywords = {Learning Sciences, LLM},
pubstate = {published},
tppubtype = {article}
}