@inproceedings{a997542f11d940c5a43c19e9e19c63e4,
title = "KATIE: A System for Key Attributes Identification in Product Knowledge Graph Construction",
abstract = "We present part of Huawei's efforts in building a Product Knowledge Graph (PKG). We want to identify which product attributes (i.e. properties) are relevant and important in terms of shopping decisions to product categories (i.e. classes). This is particularly challenging when the attributes and their values are mined from online product catalogues, i.e. HTML pages. These web pages contain semi-structured data, which do not follow a concerted format and use diverse vocabulary to designate the same features. We propose a system for key attribute identification (KATIE) based on fine-tuning pre-trained models (e.g., DistilBERT) to predict the applicability and importance of an attribute to a category. We also propose an attribute synonyms identification module that allows us to discover synonymous attributes by considering not only their labels' similarities but also the similarity of their values sets. We have evaluated our approach to Huawei categories taxonomy and a set of internally mined attributes from web pages. KATIE guarantees promising performance results compared to the most recent baselines.",
keywords = "entity resolution, fine-tuning, pre-trained language model, product knowledge graph, relation discovery",
author = "Btissam Er-Rahmadi and Arturo Oncevay and Yuanyi Ji and Pan, {Jeff Z.}",
note = "Publisher Copyright: {\textcopyright} 2023 Copyright held by the owner/author(s).; 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2023 ; Conference date: 23-07-2023 Through 27-07-2023",
year = "2023",
month = jul,
day = "19",
doi = "10.1145/3539618.3591846",
language = "English",
series = "SIGIR 2023 - Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval",
publisher = "Association for Computing Machinery, Inc",
pages = "3320--3324",
booktitle = "SIGIR 2023 - Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval",
}