@article{3334,
  keywords = {Computer vision, Deep Learning, Gated Graph Neural Network, HOI, Image Classification},
  author = {Zhan Su and Ruiyun Yu and Shihao Zou and Bingyang Guo and Li Cheng},
  title = {Spatial-Aware Multi-Level Parsing Network for Human-Object Interaction},
  abstract = {Human-Object Interaction (HOI) detection focuses on human-centered visual relationship detection, which is a challenging task due to the complexity and diversity of image content. Unlike most recent HOI detection works that only rely on paired instance-level information in the union range, our proposed Spatial-aware Multilevel Parsing Network (SMPNet) uses a multi-level information detection strategy, including instance-level visual features of detected human-object pair, part-level related features of the human body, and scene-level features extracted by the graph neural network. After fusing the three levels of features, the HOI relationship is predicted. We validate our method on two public datasets, V-COCO and HICO-DET. Compared with prior works, our proposed method achieves the state-of-the-art results on both datasets in terms of mAProle, which demonstrates the effectiveness of our proposed multi-level information detection strategy.},
  year = {2025},
  journal = {International Journal of Interactive Multimedia and Artificial Intelligence},
  volume = {9},
  chapter = {39},
  number = {2},
  pages = {39-48},
  month = {03/2025},
  issn = {1989-1660},
  url = {https://www.ijimai.org/journal/bibcite/reference/3334},
  doi = {10.9781/ijimai.2023.06.004},
}