@inproceedings{af9eea331fa74cdd8278d7a0da05736e,
title = "Training for Implicit Norms in Deep Reinforcement Learning Agents through Adversarial Multi-Objective Reward Optimization",
abstract = "We propose a deep reinforcement learning algorithm that employs an adversarial training strategy for adhering to implicit human norms alongside optimizing for a narrow goal objective. Previous methods which incorporate human values into reinforcement learning algorithms either scale poorly or assume hand-crafted state features. Our algorithm drops these assumptions and is able to automatically infer norms from human demonstrations, which allows for integrating it into existing agents in the form of multi-objective optimization. We benchmark our approach in a search-and-rescue grid world and show that, conditioned on respecting human norms, our agent maintains optimal performance with respect to the predefined goal.",
keywords = "deep learning, inverse reinforcement learning, multi-objective optimization, value alignment",
author = "Markus Peschl",
year = "2021",
doi = "10.1145/3461702.3462473",
language = "English",
series = "AIES 2021 - Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society",
publisher = "Association for Computing Machinery (ACM)",
pages = "275--276",
booktitle = "AIES 2021 - Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society",
address = "United States",
note = "4th AAAI/ACM Conference on Artificial Intelligence, Ethics, and Society, AIES 2021 ; Conference date: 19-05-2021 Through 21-05-2021",
}