@article{dbdd6bfdcf1840edb5cdc3eb79606fdd,
title = "Aggregating attentional dilated features for salient object detection",
abstract = "This paper presents a novel deep learning model to aggregate the attentional dilated features for salient object detection by exploring the complementary information between the global and local context in a convolutional neural network. There are two technical contributions to our network design. First, we develop an attentional dense atrous (dilated) spatial pyramid pooling (AD-ASPP) module to selectively use the local saliency cues captured by dilated convolutions with a small rate and the global saliency cues captured by dilated convolutions with a large rate. Second, taking the feature pyramid network as the backbone, we develop an aggregation network to integrate the refined features by formulating two consecutive chains of residual learning based modules: one chain from deep to shallow layers while another chain from shallow to deep layers. We evaluate our network on seven widely-used saliency detection benchmarks by comparing it against 21 state-of-the-art methods. Experimental results show that our network outperforms others on all the seven benchmark datasets. ",
keywords = "attentional dilated features, multiple layer aggregation, Saliency detection",
author = "Lei Zhu and Jiaxing Chen and Xiaowei Hu and Fu, {Chi Wing} and Xuemiao Xu and Jing Qin and Heng, {Pheng Ann}",
note = "Funding Information: Manuscript received May 27, 2019; revised July 28, 2019, August 17, 2019 and August 21, 2019; accepted August 21, 2019. Date of publication September 13, 2019; date of current version October 2, 2020. This work was supported in part by the Direct Grant for Research 2018/2019 funded by the CUHK Research Committee, in part by the Research Grants Council of the Hong Kong Special Administrative Region under Grant CUHK 14201717, in part by NSFC under Grant 61772206, Grant U1611461, Grant 61472145, and Grant 61902275, in part by the Guangdong Research and Development Key Project of China under Grant 2018B010107003, in part by the Guangdong High-Level Personnel Program under Grant 2016TQ03X319, in part by the Guangdong NSF under Grant 2017A030311027, in part by the Guangzhou Key Project in Industrial Technology under Grant 201802010027, and in part by the Hong Kong Innovation and Technology Commission under Grant ITS/319/17. This article was recommended by Associate Editor Y. Yang. (Lei Zhu and Jiaxing Chen are co-first authors.) (Corresponding author: Xuemiao Xu.) L. Zhu is with the School of Computer Science and Engineering, South China University of Technology, Guangzhou 510006, China, and also with the Department of Computer Science and Engineering, The Chinese University of Hong Kong, Hong Kong. Publisher Copyright: {\textcopyright} 1991-2012 IEEE.",
year = "2020",
month = oct,
doi = "10.1109/TCSVT.2019.2941017",
language = "English",
volume = "30",
pages = "3358--3371",
journal = "IEEE Transactions on Circuits and Systems for Video Technology",
issn = "1051-8215",
publisher = "IEEE",
number = "10",
}