@inproceedings{1b27030490604f71a07f6e90cdd18087,
title = "Investigating transformers in the decomposition of polygonal shapes as point collections",
abstract = "Transformers can generate predictions in two approaches: 1. auto-regressively by conditioning each sequence element on the previous ones, or 2. directly produce an output sequences in parallel. While research has mostly explored upon this difference on sequential tasks in NLP, we study the difference between auto-regressive and parallel prediction on visual set prediction tasks, and in particular on polygonal shapes in images because polygons are representative of numerous types of objects, such as buildings or obstacles for aerial vehicles. This is challenging for deep learning architectures as a polygon can consist of a varying carnality of points. We provide evidence on the importance of natural orders for Transformers, and show the benefit of decomposing complex polygons into collections of points in an auto-regressive manner.",
author = "Andrea Alfieri and Yancong Lin and {van Gemert}, {Jan C.}",
year = "2021",
doi = "10.1109/ICCVW54120.2021.00235",
language = "English",
isbn = "978-1-6654-0192-0",
series = "Proceedings of the IEEE International Conference on Computer Vision",
publisher = "IEEE",
pages = "2076--2085",
editor = "L. O'Conner",
booktitle = "Proceedings - 2021 IEEE/CVF International Conference on Computer Vision Workshops, ICCVW 2021",
address = "United States",
note = "2021 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW) ; Conference date: 11-10-2021 Through 17-10-2021",
}