Publications
Search
Feng, Andrew; Lucas, Gale; Marsella, Stacy; Suma, Evan; Chiu, Chung-Cheng; Casas, Dan; Shapiro, Ari
Acting the Part: The Role of Gesture on Avatar Identity Proceedings Article
In: Proceedings of the Seventh International Conference on Motion in Games (MIG 2014), pp. 49–54, ACM Press, Playa Vista, CA, 2014, ISBN: 978-1-4503-2623-0.
@inproceedings{feng_acting_2014,
title = {Acting the Part: The Role of Gesture on Avatar Identity},
author = {Andrew Feng and Gale Lucas and Stacy Marsella and Evan Suma and Chung-Cheng Chiu and Dan Casas and Ari Shapiro},
url = {http://dl.acm.org/citation.cfm?doid=2668064.2668102},
doi = {10.1145/2668064.2668102},
isbn = {978-1-4503-2623-0},
year = {2014},
date = {2014-11-01},
booktitle = {Proceedings of the Seventh International Conference on Motion in Games (MIG 2014)},
pages = {49–54},
publisher = {ACM Press},
address = {Playa Vista, CA},
abstract = {Recent advances in scanning technology have enabled the widespread capture of 3D character models based on human subjects. However, in order to generate a recognizable 3D avatar, the movement and behavior of the human subject should be captured and replicated as well. We present a method of generating a 3D model from a scan, as well as a method to incorporate a subjects style of gesturing into a 3D character. We present a study which shows that 3D characters that used the gestural style as their original human subjects were more recognizable as the original subject than those that don’t.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Safir, Marilyn P.; Wallach, Helene S.; Rizzo, Albert "Skip" (Ed.)
Future Directions in Post-Traumatic Stress Disorder Book
Springer US, Boston, MA, 2014, ISBN: 978-1-4899-7521-8 978-1-4899-7522-5.
@book{safir_future_2014,
title = {Future Directions in Post-Traumatic Stress Disorder},
editor = {Marilyn P. Safir and Helene S. Wallach and Albert "Skip" Rizzo},
url = {http://link.springer.com/10.1007/978-1-4899-7522-5},
isbn = {978-1-4899-7521-8 978-1-4899-7522-5},
year = {2014},
date = {2014-11-01},
publisher = {Springer US},
address = {Boston, MA},
abstract = {Ours is an era of increasing tension, both global and local. And not surprisingly, PTSD is recognized not only in combat veterans and active military personnel, but also disaster and assault survivors across the demographic spectrum. As current events from mass shootings to the debate over trigger warnings keep the issue in the public eye, the disorder remains a steady concern among researchers and practitioners. Future Directions in Post-Traumatic Stress Disorder presents findings and ideas with the potential to influence both our conceptualization of the condition and the techniques used to address it. A multidisciplinary panel of experts offers new analyses of risk and resilience factors, individual and group approaches to prevention, the evolving process of diagnosis, and effective treatment and delivery. Chapters on treatment allow readers to compare widely-used prolonged exposure and VR methods with innovative applications of cognitive processing therapy and interpersonal therapy. And an especially compelling contribution surveys empirically-based programs relating to what for many is the emblematic trauma of our time, the events of September 11, 2001. Included in the coverage: •Predictors of vulnerability to PTSD: neurobiological and genetic risk factors. •Early intervention: is prevention better than cure? •The functional neuroanatomy of PTSD. •The development of evidence-based treatment for PTSD. •Enhancing exposure therapy using D-Cycloserine (DCS). •PLUS: a case example as seen through five therapeutic perspectives. While millions experience trauma, relatively few develop chronic PTSD. Future Directions in Post-Traumatic Stress Disorder is a practical and proactive reference for the health and clinical psychologists, sociologists, psychiatrists, and primary care physicians dedicated to further decreasing those numbers.},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
Reger, Greg M.; Rizzo, Albert A.; Gahm, Gregory A.
Initial Development and Dissemination of Virtual Reality Exposure Therapy for Combat-Related PTSD Book Section
In: Safir, Marilyn P.; Wallach, Helene S.; Rizzo, Albert "Skip" (Ed.): Future Directions in Post-Traumatic Stress Disorder, pp. 289–302, Springer US, Boston, MA, 2014, ISBN: 978-1-4899-7521-8 978-1-4899-7522-5.
@incollection{reger_initial_2014,
title = {Initial Development and Dissemination of Virtual Reality Exposure Therapy for Combat-Related PTSD},
author = {Greg M. Reger and Albert A. Rizzo and Gregory A. Gahm},
editor = {Marilyn P. Safir and Helene S. Wallach and Albert "Skip" Rizzo},
url = {http://link.springer.com/10.1007/978-1-4899-7522-5_15},
isbn = {978-1-4899-7521-8 978-1-4899-7522-5},
year = {2014},
date = {2014-11-01},
booktitle = {Future Directions in Post-Traumatic Stress Disorder},
pages = {289–302},
publisher = {Springer US},
address = {Boston, MA},
abstract = {Military personnel are at risk for the development of posttraumatic stress disorder. Although effective treatments are available, the need for improved treatment efficacy and less stigmatizing approaches to treatment have resulted in the evolution of virtual reality exposure therapy. This chapter reviews the development and dissemination efforts of a virtual reality system supporting exposure therapy for deployment-related posttraumatic stress disorder. Specifically, the chapter will review the work done to incorporate the feedback of military personnel into the early development of a Virtual Iraq/Afghanistan system and also reviews efforts to disseminate this treatment to military and Veteran behavioral health researchers and providers.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Fyffe, Graham; Jones, Andrew; Alexander, Oleg; Ichikari, Ryosuke; Debevec, Paul
Driving High-Resolution Facial Scans with Video Performance Capture Journal Article
In: ACM Transactions on Graphics (TOG), vol. 34, no. 1, pp. 1– 13, 2014.
@article{fyffe_driving_2014,
title = {Driving High-Resolution Facial Scans with Video Performance Capture},
author = {Graham Fyffe and Andrew Jones and Oleg Alexander and Ryosuke Ichikari and Paul Debevec},
url = {http://ict.usc.edu/pubs/Driving%20High-Resolution%20Facial%20Scans%20with%20Video%20Performance%20Capture.pdf},
year = {2014},
date = {2014-11-01},
journal = {ACM Transactions on Graphics (TOG)},
volume = {34},
number = {1},
pages = {1– 13},
abstract = {We present a process for rendering a realistic facial performance with control of viewpoint and illumination. The performance is based on one or more high-quality geometry and reflectance scans of an actor in static poses, driven by one or more video streams of a performance. We compute optical flow correspondences between neighboring video frames, and a sparse set of correspondences between static scans and video frames. The latter are made possible by leveraging the relightability of the static 3D scans to match the viewpoint(s) and appearance of the actor in videos taken in arbitrary environments. As optical flow tends to compute proper correspondence for some areas but not others, we also compute a smoothed, per-pixel confidence map for every computed flow, based on normalized cross-correlation. These flows and their confidences yield a set of weighted triangulation constraints among the static poses and the frames of a performance. Given a single artist-prepared face mesh for one static pose, we optimally combine the weighted triangulation constraints, along with a shape regularization term, into a consistent 3D geometry solution over the entire performance that is drift free by construction. In contrast to previous work, even partial correspondences contribute to drift minimization, for example, where a successful match is found in the eye region but not the mouth. Our shape regularization employs a differential shape term based on a spatially varying blend of the differential shapes of the static poses and neighboring dynamic poses, weighted by the associated flow confidences. These weights also permit dynamic reflectance maps to be produced for the performance by blending the static scan maps. Finally, as the geometry and maps are represented on a consistent artist-friendly mesh, we render the resulting high-quality animated face geometry and animated reflectance maps using standard rendering tools.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Ghosh, Sayan; Chatterjee, Moitreya; Morency, Louis-Philippe
A Multimodal Context-based Approach for Distress Assessment Proceedings Article
In: Proceedings of the 16th International Conference on Multimodal Interaction, pp. 240–246, ACM Press, Istanbul, Turkey, 2014, ISBN: 978-1-4503-2885-2.
@inproceedings{ghosh_multimodal_2014,
title = {A Multimodal Context-based Approach for Distress Assessment},
author = {Sayan Ghosh and Moitreya Chatterjee and Louis-Philippe Morency},
url = {http://dl.acm.org/citation.cfm?doid=2663204.2663274},
doi = {10.1145/2663204.2663274},
isbn = {978-1-4503-2885-2},
year = {2014},
date = {2014-11-01},
booktitle = {Proceedings of the 16th International Conference on Multimodal Interaction},
pages = {240–246},
publisher = {ACM Press},
address = {Istanbul, Turkey},
abstract = {The increasing prevalence of psychological distress disorders, such as depression and post-traumatic stress, necessitates a serious effort to create new tools and technologies to help with their diagnosis and treatment. In recent years, new computational approaches were proposed to objectively analyze patient non-verbal behaviors over the duration of the entire interaction between the patient and the clinician. In this paper, we go beyond non-verbal behaviors and propose a tri-modal approach which integrates verbal behaviors with acoustic and visual behaviors to analyze psychological distress during the course of the dyadic semi-structured interviews. Our approach exploits the advantages of the dyadic nature of these interactions to contextualize the participant responses based on the affective components (intimacy and polarity levels) of the questions. We validate our approach using one of the largest corpus of semi-structured interviews for distress assessment which consists of 154 multimodal dyadic interactions. Our results show significant improvement on distress prediction performance when integrating verbal behaviors with acoustic and visual behaviors. In addition, our analysis shows that contextualizing the responses improves the prediction performance, most significantly with positive and intimate questions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Park, Sunghyun; Shim, Han Suk; Chatterjee, Moitreya; Sagae, Kenji; Morency, Louis-Philippe
Computational Analysis of Persuasiveness in Social Multimedia: A Novel Dataset and Multimodal Prediction Approach Proceedings Article
In: Proceedings of the 16th International Conference on Multimodal Interaction, pp. 50–57, ACM Press, 2014, ISBN: 978-1-4503-2885-2.
@inproceedings{park_computational_2014,
title = {Computational Analysis of Persuasiveness in Social Multimedia: A Novel Dataset and Multimodal Prediction Approach},
author = {Sunghyun Park and Han Suk Shim and Moitreya Chatterjee and Kenji Sagae and Louis-Philippe Morency},
url = {http://dl.acm.org/citation.cfm?doid=2663204.2663260},
doi = {10.1145/2663204.2663260},
isbn = {978-1-4503-2885-2},
year = {2014},
date = {2014-11-01},
booktitle = {Proceedings of the 16th International Conference on Multimodal Interaction},
pages = {50–57},
publisher = {ACM Press},
abstract = {Our lives are heavily influenced by persuasive communication, and it is essential in almost any types of social interactions from business negotiation to conversation with our friends and family. With the rapid growth of social multimedia websites, it is becoming ever more important and useful to understand persuasiveness in the context of social multimedia content online. In this paper, we introduce our newly created multimedia corpus of 1,000 movie review videos obtained from a social multimedia website called ExpoTV.com, which will be made freely available to the research community. Our research results presented here revolve around the following 3 main research hypotheses. Firstly, we show that computational descriptors derived from verbal and nonverbal behavior can be predictive of persuasiveness. We further show that combining descriptors from multiple communication modalities (audio, text and visual) improve the prediction performance compared to using those from single modality alone. Secondly, we investigate if having prior knowledge of a speaker expressing a positive or negative opinion helps better predict the speaker's persuasiveness. Lastly, we show that it is possible to make comparable prediction of persuasiveness by only looking at thin slices (shorter time windows) of a speaker's behavior.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Scherer, Stefan; Hammal, Zakia; Yang, Ying; Morency, Louis-Philippe; Cohn, Jeffrey F.
Dyadic Behavior Analysis in Depression Severity Assessment Interviews Proceedings Article
In: Proceedings of the 16th International Conference on Multimodal Interaction, pp. 112–119, ACM Press, Istanbul, Turkey, 2014, ISBN: 978-1-4503-2885-2.
@inproceedings{scherer_dyadic_2014,
title = {Dyadic Behavior Analysis in Depression Severity Assessment Interviews},
author = {Stefan Scherer and Zakia Hammal and Ying Yang and Louis-Philippe Morency and Jeffrey F. Cohn},
url = {http://dl.acm.org/citation.cfm?doid=2663204.2663238},
doi = {10.1145/2663204.2663238},
isbn = {978-1-4503-2885-2},
year = {2014},
date = {2014-11-01},
booktitle = {Proceedings of the 16th International Conference on Multimodal Interaction},
pages = {112–119},
publisher = {ACM Press},
address = {Istanbul, Turkey},
abstract = {Previous literature suggests that depression impacts vocal timing of both participants and clinical interviewers but is mixed with respect to acoustic features. To investigate further, 57 middle-aged adults (men and women) with Major Depression Disorder and their clinical interviewers (all women) were studied. Participants were interviewed for depression severity on up to four occasions over a 21 week period using the Hamilton Rating Scale for Depression (HRSD), which is a criterion measure for depression severity in clinical trials. Acoustic features were extracted for both participants and interviewers using COVAREP Toolbox. Missing data occurred due to missed appointments, technical problems, or insufficient vocal samples. Data from 36 participants and their interviewers met criteria and were included for analysis to compare between high and low depression severity. Acoustic features for participants varied between men and women as expected, and failed to vary with depression severity for participants. For interviewers, acoustic characteristics strongly varied with severity of the interviewee's depression. Accommodation - the tendency of interactants to adapt their communicative behavior to each other - between interviewers and interviewees was inversely related to depression severity. These findings suggest that interviewers modify their acoustic features in response to depression severity, and depression severity strongly impacts interpersonal accommodation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Nouri, Elnaz
Training Agents by Crowds Proceedings Article
In: Proceedings of HCOMP 2014, Pittsburgh, PA, 2014.
@inproceedings{nouri_training_2014,
title = {Training Agents by Crowds},
author = {Elnaz Nouri},
url = {http://ict.usc.edu/pubs/Training%20Agents%20by%20Crowds.pdf},
year = {2014},
date = {2014-11-01},
booktitle = {Proceedings of HCOMP 2014},
address = {Pittsburgh, PA},
abstract = {On-line learning algorithms are particularly suitable for developing interactive computational agents. These algorithm can be used to teach the agents the abilities needed for engaging in social interactions with humans. If humans are used as teachers in the context of on-line learning algorithms a serious challenge arises: their lack of commitment and availability during the required extensive training. In this work we address this challenge by showing how ”crowds of human workers” rather than ”single users” can be recruited as teachers for training each learning agent. This paper proposes a framework for training agents by the crowds. The focus of this proposal is narrowed by using Reinforcement Learning as the human guidance method for teaching agents how to engage in simple negotiation games (such as the Ultimatum Bargaining Game and the Dictator Game).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Nouri, Elnaz; Georgila, Kallirroi; Traum, David
Culture-specific models of negotiation for virtual characters: multi-attribute decision-making based on culture-specific values Journal Article
In: Journal of AI & Society 2014, 2014, ISSN: 0951-5666, 1435-5655.
@article{nouri_culture-specific_2014,
title = {Culture-specific models of negotiation for virtual characters: multi-attribute decision-making based on culture-specific values},
author = {Elnaz Nouri and Kallirroi Georgila and David Traum},
url = {http://link.springer.com/10.1007/s00146-014-0570-7},
doi = {10.1007/s00146-014-0570-7},
issn = {0951-5666, 1435-5655},
year = {2014},
date = {2014-10-01},
journal = {Journal of AI & Society 2014},
abstract = {We posit that observed differences in negotiation performance across cultures can be explained by participants trying to optimize across multiple values, where the relative importance of values differs across cultures. We look at two ways for specifying weights on values for different cultures: one in which the weights of the model are hand-crafted, based on intuition interpreting Hofstede dimensions for the cultures, and one in which the weights of the model are learned from data using Inverse Reinforcement Learning (IRL). We apply this model to the Ultimatum Game and integrate it into a virtual human dialogue system. We show that weights learned from IRL surpass both a weak baseline with random weights, and a strong baseline considering only one factor of maximizing gain in own wealth in accounting for the behavior of human players from four different cultures. We also show that the weights learned with our model for one culture outperform weights learned for other cultures when playing against opponents of the first culture.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Scherer, Stefan; Stratou, Giota; Lucas, Gale; Mahmoud, Marwa; Boberg, Jill; Gratch, Jonathan; Rizzo, Albert (Skip); Morency, Louis-Philippe
Automatic audiovisual behavior descriptors for psychological disorder analysis Journal Article
In: Image and Vision Computing Journal, vol. 32, no. 10, pp. 648–658, 2014, ISSN: 02628856.
@article{scherer_automatic_2014,
title = {Automatic audiovisual behavior descriptors for psychological disorder analysis},
author = {Stefan Scherer and Giota Stratou and Gale Lucas and Marwa Mahmoud and Jill Boberg and Jonathan Gratch and Albert (Skip) Rizzo and Louis-Philippe Morency},
url = {http://linkinghub.elsevier.com/retrieve/pii/S0262885614001000},
doi = {10.1016/j.imavis.2014.06.001},
issn = {02628856},
year = {2014},
date = {2014-10-01},
journal = {Image and Vision Computing Journal},
volume = {32},
number = {10},
pages = {648–658},
abstract = {We investigate the capabilities of automatic audiovisual nonverbal behavior descriptors to identify indicators of psychological disorders such as depression, anxiety, and post-traumatic stress disorder. Due to strong correlations between these disordersas measured with standard self-assessment questionnaires in this study, we focus our investigations in particular on a generic distress measure as identified using factor analysis. Within this work, we seek to confirm and enrich present state of the art, predominantly based on qualitative manual annotations, with automatic quantitative behavior descriptors. We propose a number of nonverbal behavior descriptors that can be automatically estimated from audiovisual signals. Such automatic behavior descriptors could be used to support healthcare providers with quantified and objective observations that could ultimately improve clinical assessment. We evaluate our work on the dataset called the Distress Assessment Interview Corpus (DAIC) which comprises dyadic interactions between a confederate interviewer and a paid participant. Our evaluation on this dataset shows correlation of our automatic behavior descriptors with the derived general distress measure. Our analysis also includes a deeper study of self-adaptor and fidgeting behaviors based on detailed annotations of where these behaviors occur.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Joshi, Himanshu; Rosenbloom, Paul S.; Ustun, Volkan
Isolated word recognition in the Sigma cognitive architecture Journal Article
In: Biologically Inspired Cognitive Architectures, vol. 10, pp. 1–9, 2014, ISSN: 2212683X.
@article{joshi_isolated_2014,
title = {Isolated word recognition in the Sigma cognitive architecture},
author = {Himanshu Joshi and Paul S. Rosenbloom and Volkan Ustun},
url = {http://linkinghub.elsevier.com/retrieve/pii/S2212683X14000644},
doi = {10.1016/j.bica.2014.11.001},
issn = {2212683X},
year = {2014},
date = {2014-10-01},
journal = {Biologically Inspired Cognitive Architectures},
volume = {10},
pages = {1–9},
abstract = {Symbolic architectures are effective at complex cognitive reasoning, but typically are incapable of important forms of sub-cognitive processing – such as perception – without distinct modules connected to them via low-bandwidth interfaces. Neural architectures, in contrast, may be quite effective at the latter, but typically struggle with the former. Sigma has been designed to leverage the state-of-the-art hybrid (discrete + continuous) mixed (symbolic + probabilistic) capability of graphical models to provide in a uniform non-modular fashion effective forms of, and integration across, both cognitive and sub-cognitive behavior. Here it is shown that Sigma is not only capable of performing a simple variant of speech recognition via the same knowledge structures and reasoning algorithm used for cognitive processing, but also of leveraging its existing knowledge templates and learning algorithm to acquire automatically most of the structures and parameters needed for this recognition activity.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Blumberg, Fran C.; Burke, Lauren C.; Hodent, Celia; Evans, Michael A.; Lane, H. Chad; Schell, Jesse
Serious Games for Health: Features, Challenges, Next Steps Journal Article
In: Games for Health Journal, vol. 3, no. 5, pp. 270–276, 2014, ISSN: 2161-783X, 2161-7856.
@article{blumberg_serious_2014,
title = {Serious Games for Health: Features, Challenges, Next Steps},
author = {Fran C. Blumberg and Lauren C. Burke and Celia Hodent and Michael A. Evans and H. Chad Lane and Jesse Schell},
url = {http://online.liebertpub.com/doi/abs/10.1089/g4h.2014.0079},
doi = {10.1089/g4h.2014.0079},
issn = {2161-783X, 2161-7856},
year = {2014},
date = {2014-10-01},
journal = {Games for Health Journal},
volume = {3},
number = {5},
pages = {270–276},
abstract = {As articles in this journal have demonstrated over the past 3 years, serious game development continues to flourish as a vehicle for formal and informal health education. How best to characterize a “serious” game remains somewhat elusive in the literature. Many researchers and practitioners view serious games as capitalizing on computer technology and state-of-the-art video graphics as an enjoyable means by which to provide and promote instruction and training, or to facilitate attitude change among its players. We invited four distinguished researchers and practitioners to further discuss with us how they view the characteristics of serious games for health, how those characteristics differ from those for academic purposes, the challenges posed for serious game development among players of different ages, and next steps for the development and empirical examination of the effectiveness of serious games for players' psychological and physical well-being.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Chen, Chih-Fan; Spicer, Ryan; Yahata, Rhys; Bolas, Mark; Suma, Evan
Real-time and Robust Grasping Detection Proceedings Article
In: Proceedings of the 2nd ACM symposium on Spatial user interaction, pp. 159–159, ACM, Honolulu, HI, 2014.
@inproceedings{chen_real-time_2014,
title = {Real-time and Robust Grasping Detection},
author = {Chih-Fan Chen and Ryan Spicer and Rhys Yahata and Mark Bolas and Evan Suma},
url = {http://ict.usc.edu/pubs/Real-Time%20and%20Robust%20Grasping%20Detection.pdf},
year = {2014},
date = {2014-10-01},
booktitle = {Proceedings of the 2nd ACM symposium on Spatial user interaction},
pages = {159–159},
publisher = {ACM},
address = {Honolulu, HI},
abstract = {Depth-based gesture cameras provide a promising and novel way to interface with computers. Nevertheless, this type of interaction remains challenging due to the complexity of finger interactions and the under large viewpoint variations. Existing middleware such as Intel Perceptual Computing SDK (PCSDK) or SoftKinetic IISU can provide abundant hand tracking and gesture information. However, the data is too noisy (Fig. 1, left) for consistent and reliable use in our application. In this work, we present a filtering approach that combines several features from PCSDK to achieve more stable hand openness and supports grasping interactions in virtual environments. Support vector machine (SVM), a machine learning method, is used to achieve better accuracy in a single frame, and Markov Random Field (MRF), a probability theory, is used to stabilize and smooth the sequential output. Our experimental results verify the effectiveness and the robustness of our method.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Pincus, Eli; DeVault, David; Traum, David
Mr. Clue - A Virtual Agent that can Play Word-Guessing Games Proceedings Article
In: Proceedings of the 3rd Workshop on Games and NLP (GAMNLP-14), Raleigh, NC, 2014.
@inproceedings{pincus_mr_2014,
title = {Mr. Clue - A Virtual Agent that can Play Word-Guessing Games},
author = {Eli Pincus and David DeVault and David Traum},
url = {http://ict.usc.edu/pubs/Mr.%20Clue%20-%20A%20Virtual%20Agent%20that%20can%20Play%20Word-Guessing%20Games.pdf},
year = {2014},
date = {2014-10-01},
booktitle = {Proceedings of the 3rd Workshop on Games and NLP (GAMNLP-14)},
address = {Raleigh, NC},
abstract = {This demonstration showcases a virtual agent, Mr. Clue, capable of acting in the role of clue-giver in a wordguessing game. The agent has the ability to automatically generate clues and update its dialogue policy dynamically based on user input.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Segbroeck, Maarten Van; Travadi, Ruchir; Narayanan, Shrikanth S.
UBM Fused Total Variability Modeling for Language Identification Proceedings Article
In: Proceedings of the Fifteenth Annual Conference of the International Speech Communication Association, INTERSPEECH, Singapore, 2014.
@inproceedings{van_segbroeck_ubm_2014,
title = {UBM Fused Total Variability Modeling for Language Identification},
author = {Maarten Van Segbroeck and Ruchir Travadi and Shrikanth S. Narayanan},
url = {http://ict.usc.edu/pubs/UBM%20Fused%20Total%20Variability%20Modeling%20for%20Language%20Identification.pdf},
year = {2014},
date = {2014-09-01},
booktitle = {Proceedings of the Fifteenth Annual Conference of the International Speech Communication Association},
publisher = {INTERSPEECH},
address = {Singapore},
abstract = {This paper proposes Universal Background Model (UBM) fusion in the framework of total variability or i-vector modeling with the application to language identification (LID). The total variability subspace which is typically exploited to discriminate between the language classes of different speech recordings, is trained by combining the normalized Baum-Welch statistics of multiple UBMs. When the UBMs model a diverse set of feature representations, the method yields an i-vector representation which is more discriminant between the classes of interest. This approach is particularly useful when applied to shortduration utterances, and is a computationally less complex alternative to performance boosting as compared to system level fusion. We assess the performance of UBM fused total variability modeling on the task of robust language identification on short-duration utterances, as part of Phase-III of the DARPA RATS (Robust Automatic Transcription of Speech) program.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The Context of Military Environments: Social and Organizational Factors Technical Report
National Academies Press Washington, DC, 2014.
@techreport{noauthor_context_2014,
title = {The Context of Military Environments: Social and Organizational Factors},
url = {http://sites.nationalacademies.org/DBASSE/BBCSS/CurrentProjects/DBASSE_080746},
year = {2014},
date = {2014-09-01},
address = {Washington, DC},
institution = {National Academies Press},
abstract = {The U.S. Army faces a variety of challenges to maintain a ready and capable force into the future. Its missions are diverse, following a continuum from peace to war that includes combat and counterinsurgency operations as well as negotiation, reconstruction, and stability operations that require a variety of personnel and skill sets to execute. Missions often demand rapid decision making and coordination with others in novel ways, so that personnel are not simply following a specific set of tactical orders but, rather, carrying out mission command through an understanding of broader strategic goals in order to develop and choose among courses of action. Like any workforce, the Army is diverse in terms of demographic characteristics, such as gender and race, with a commitment of its leadership to ensure equal opportunities across all demographic parties. With these challenges comes the urgent need to better understand how contextual factors influence soldier and small unit behavior and mission performance.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
Nazarian, Angela; Nouri, Elnaz; Traum, David
Initiative Patterns in Dialogue Genres Proceedings Article
In: Proceedings of Semdial 2014, Edinburgh, UK, 2014.
@inproceedings{nazarian_initiative_2014,
title = {Initiative Patterns in Dialogue Genres},
author = {Angela Nazarian and Elnaz Nouri and David Traum},
url = {http://ict.usc.edu/pubs/Initiative%20Patterns%20in%20Dialogue%20Genres.pdf},
year = {2014},
date = {2014-09-01},
booktitle = {Proceedings of Semdial 2014},
address = {Edinburgh, UK},
abstract = {One of the ways of distinguishing different dialogue genres is the differences in patterns of interactions between the participants. Morbini et al (2013) informally define dialogue genres on the basis of features like user vs system initiative, amongst other criteria. In this paper, we apply the multi-label initiative annotation scheme and related features from (Nouri and Traum, 2014) to a set of dialogue corpora from different domains. In our initial study, we examine two questionanswering domains, a “slot-filling” service application domain, and several human-human negotiation domains.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Baltrušaitis, Tadas; Robinson, Peter; Morency, Louis-Philippe
Continuous Conditional Neural Fields for Structured Regression Book Section
In: Computer Vision–ECCV 2014, pp. 593–608, Springer, 2014.
@incollection{baltrusaitis_continuous_2014,
title = {Continuous Conditional Neural Fields for Structured Regression},
author = {Tadas Baltrušaitis and Peter Robinson and Louis-Philippe Morency},
url = {http://ict.usc.edu/pubs/Continuous%20Conditional%20Neural%20Fields%20for%20Structured%20Regression.pdf},
year = {2014},
date = {2014-09-01},
booktitle = {Computer Vision–ECCV 2014},
pages = {593–608},
publisher = {Springer},
abstract = {An increasing number of computer vision and pattern recognition problems require structured regression techniques. Problems like human pose estimation, unsegmented action recognition, emotion prediction and facial landmark detection have temporal or spatial output dependencies that regular regression techniques do not capture. In this paper we present continuous conditional neural fields (CCNF) textbackslashtextbackslashvphantom a novel structured regression model that can learn non-linear input-output dependencies, and model temporal and spatial output relationships of vary- ing length sequences. We propose two instances of our CCNF framework: Chain-CCNF for time series modelling, and Grid-CCNF for spatial relationship modelling. We evaluate our model on five public datasets spanning three different regression problems: facial landmark detection in the wild, emotion prediction in music and facial action unit recognition. Our CCNF model demonstrates state-of-the-art performance on all of the datasets used.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Feng, Andrew; Shapiro, Ari; Lhommet, Margaux; Marsella, Stacy
Embodied Autonomous Agents Book Section
In: Handbook of Virtual Environments: Design, Implementation, and Applications, pp. 335–352, 2014.
@incollection{feng_embodied_2014,
title = {Embodied Autonomous Agents},
author = {Andrew Feng and Ari Shapiro and Margaux Lhommet and Stacy Marsella},
url = {http://books.google.com/books?hl=en&lr=&id=7zzSBQAAQBAJ&oi=fnd&pg=PP1&dq=+Handbook+of+Virtual+Environments&ots=Vx3ia0S2Uu&sig=LaVbSdoG3FahlbVYbuCxLmKgFIA#v=onepage&q=Handbook%20of%20Virtual%20Environments&f=false},
year = {2014},
date = {2014-09-01},
booktitle = {Handbook of Virtual Environments: Design, Implementation, and Applications},
pages = {335–352},
abstract = {Since the last decade, virtual environments have been extensively used for a wide range of application, from training systems to video games. Virtual humans are animated characters that are designed to populate these environments and to interact with the objects of the world as well as with the user. A virtual agent must perceive the world in which it exists, reason about those perceptions, and decide on how to act on them in pursuit of its own agenda.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Yang, Zhaojun; Narayanan, Shrikanth
Analysis of Emotional Effect on Speech-Body Gesture Interplay Proceedings Article
In: Proceedings of the Fifteenth Annual Conference of the International Speech Communication Association, Singapore, 2014.
@inproceedings{yang_analysis_2014,
title = {Analysis of Emotional Effect on Speech-Body Gesture Interplay},
author = {Zhaojun Yang and Shrikanth Narayanan},
url = {http://ict.usc.edu/pubs/Analysis%20of%20Emotional%20Effect%20on%20Speech-Body%20Gesture%20Interplay.pdf},
year = {2014},
date = {2014-09-01},
booktitle = {Proceedings of the Fifteenth Annual Conference of the International Speech Communication Association},
address = {Singapore},
abstract = {In interpersonal interactions, speech and body gesture channels are internally coordinated towards conveying communicative intentions. The speech-gesture relationship is influenced by the internal emotion state underlying the communication. In this paper, we focus on uncovering the emotional effect on the interrelation between speech and body gestures. We investigate acoustic features describing speech prosody (pitch and energy) and vocal tract configuration (MFCCs), as well as three types of body gestures, viz., head motion, lower and upper body motions. We employ mutual information to measure the coordination between the two communicative channels, and analyze the quantified speech-gesture link with respect to distinct levels of emotion attributes, i.e., activation and valence. The results reveal that the speech-gesture coupling is generally tighter for low-level activation and high-level valence, compared to high-level activation and low-level valence. We further propose a framework for modeling the dynamics of speech-gesture interaction. Experimental studies suggest that such quantified coupling representations can well discriminate different levels of activation and valence, reinforcing that emotions are encoded in the dynamics of the multimodal link. We also verify that the structures of the coupling representations are emotiondependent using subspace-based analysis.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Filter
Sorry, no publications matched your criteria.