@article{MCDDC6569, title = "Label Differential Privacy Study for Privacy Protection in Multimodal Contrastive Learning Model", journal = "The Transactions of the Korea Information Processing Society", year = "2025", issn = "null", doi = "https://doi.org/10.3745/TKIPS.2025.14.5.289", author = "Youngseo Kim, Minseo Yu, Younghan Lee, Ho Bae", keywords = "Differential Privacy, Multimodal Deep Learning, Contrastive Learning, Data Privacy", abstract = "Recent advancements in multimodal deep learning have garnered significant attention from both academia and industry due to their exceptional accuracy and ability to learn rich knowledge representations. In particular, contrastive learning based approaches have played a pivotal role in dramatically enhancing the performance of multimodal deep learning. However, the use of multiple data sources in multimodal deep learning increases the risk of inferring sensitive information through data fusion, posing a higher privacy invasion attack compared to unimodal deep learning. This challenge cannot be fully addressed by privacy preserving techniques traditionally employed in unimodal deep learning, underscoring the growing importance of privacy protection in this domain. To address this issue, previous studies have relied on trusted execution environments or strengthened security by selectively recording data classified as privacy threatening. However, these approaches face limitations such as hardware dependency, performance degradation, and accuracy issues in data classification. These shortcomings hinder scalability and usability while leaving systems vulnerable to emerging threats. In this study, we address the privacy concerns by applying the Double Randomized Response algorithm, which ensures label differential privacy during the data preparation process. As a result, we achieved 80.14% accuracy in image-table matching and classification tasks, demonstrating a balance between privacy protection and performance. This method is the first to incorporate data security considerations into multimodal deep learning models while substantiating its efficacy, marking a significant contribution to the field." }