diff --git a/src/losses.py b/src/losses.py new file mode 100644 index 0000000..42b6046 --- /dev/null +++ b/src/losses.py @@ -0,0 +1,191 @@ +""" +Author: Yonglong Tian (yonglong@mit.edu) +Date: May 07, 2020 +""" +from __future__ import print_function + +import torch +import torch.nn as nn +import numpy as np + +class SupConLoss(nn.Module): + """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + It also supports the unsupervised contrastive loss in SimCLR""" + def __init__(self, temperature=0.07, contrast_mode='all', + base_temperature=0.07): + super(SupConLoss, self).__init__() + self.temperature = temperature + self.contrast_mode = contrast_mode + self.base_temperature = base_temperature + + def forward(self, features, labels=None, mask=None): + """Compute loss for model. If both `labels` and `mask` are None, + it degenerates to SimCLR unsupervised loss: + https://arxiv.org/pdf/2002.05709.pdf + + Args: + features: hidden vector of shape [bsz, n_views, ...]. + labels: ground truth of shape [bsz]. + mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j + has the same class as sample i. Can be asymmetric. + Returns: + A loss scalar. + """ + device = (torch.device('cuda') + if features.is_cuda + else torch.device('cpu')) + + if len(features.shape) < 3: + raise ValueError('`features` needs to be [bsz, n_views, ...],' + 'at least 3 dimensions are required') + if len(features.shape) > 3: + features = features.view(features.shape[0], features.shape[1], -1) + + batch_size = features.shape[0] + if labels is not None and mask is not None: + raise ValueError('Cannot define both `labels` and `mask`') + elif labels is None and mask is None: + mask = torch.eye(batch_size, dtype=torch.float32).to(device) + elif labels is not None: + labels = labels.contiguous().view(-1, 1) + if labels.shape[0] != batch_size: + raise ValueError('Num of labels does not match num of features') + mask = torch.eq(labels, labels.T).float().to(device) + else: + mask = mask.float().to(device) + + contrast_count = features.shape[1] + contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0) + if self.contrast_mode == 'one': + anchor_feature = features[:, 0] + anchor_count = 1 + elif self.contrast_mode == 'all': + anchor_feature = contrast_feature + anchor_count = contrast_count + else: + raise ValueError('Unknown mode: {}'.format(self.contrast_mode)) + + # compute logits + anchor_dot_contrast = torch.div( + torch.matmul(anchor_feature, contrast_feature.T), + self.temperature) + # for numerical stability + logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) + logits = anchor_dot_contrast - logits_max.detach() + + # tile mask + mask = mask.repeat(anchor_count, contrast_count) + # mask-out self-contrast cases + logits_mask = torch.scatter( + torch.ones_like(mask), + 1, + torch.arange(batch_size * anchor_count).view(-1, 1).to(device), + 0 + ) + mask = mask * logits_mask + + # compute log_prob + exp_logits = torch.exp(logits) * logits_mask + log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True)) + + # compute mean of log-likelihood over positive + mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1) + + # loss + loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos + loss = loss.view(anchor_count, batch_size).mean() + + return loss + + +class SupConLoss1View(nn.Module): + """Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf. + It also supports the unsupervised contrastive loss in SimCLR""" + def __init__(self, temperature=0.07, base_temperature=0.07): + super(SupConLoss1View, self).__init__() + self.temperature = temperature + self.base_temperature = base_temperature + + def forward(self, features, labels): + """Compute loss for model. If both `labels` and `mask` are None, + it degenerates to SimCLR unsupervised loss: + https://arxiv.org/pdf/2002.05709.pdf + + Args: + features: hidden vector of shape [bsz, ndim]. + labels: ground truth of shape [bsz]. + Returns: + A loss scalar. + """ + device = (torch.device('cuda') + if features.is_cuda + else torch.device('cpu')) + + if len(features.shape) != 2: + raise ValueError('`features` needs to be [bsz, ndim]') + + batch_size = features.shape[0] + labels = labels.contiguous().view(-1, 1) + if labels.shape[0] != batch_size: + raise ValueError('Num of labels does not match num of features') + mask = torch.eq(labels, labels.T).float().to(device) + + cross = torch.matmul(features, features.T) + + upper_diag = torch.triu_indices(batch_size,batch_size,+1) + cross_upper = cross[upper_diag[0], upper_diag[1]] + mask_upper = mask[upper_diag[0], upper_diag[1]] + pos = mask_upper.sum() + # weight = torch.from_numpy(np.asarray([1-pos, pos], dtype=float)).to(device) + return torch.nn.functional.binary_cross_entropy_with_logits(cross_upper, mask_upper) + + + + + + + + + + + + + + + + + + # compute logits + anchor_dot_contrast = torch.div(torch.matmul(features, features.T),self.temperature) + # for numerical stability + # logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) + # logits = anchor_dot_contrast - logits_max.detach() + logits = anchor_dot_contrast + + # mask-out self-contrast cases + # logits_mask = torch.scatter( + # torch.ones_like(mask), + # 1, + # torch.arange(batch_size * anchor_count).view(-1, 1).to(device), + # 0 + # ) + # mask = mask * logits_mask + logits_mask = torch.ones_like(mask) + logits_mask.fill_diagonal_(0) + mask.fill_diagonal_(0) + + # compute log_prob + exp_logits = torch.exp(logits) * logits_mask + log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True)) + + # compute mean of log-likelihood over positive + div = mask.sum(1) + div=torch.clamp(div, min=1) + mean_log_prob_pos = (mask * log_prob).sum(1) / div + + # loss + loss = - (self.temperature / self.base_temperature) * mean_log_prob_pos + # loss = loss.view(anchor_count, batch_size).mean() + loss = loss.view(-1, batch_size).mean() + + return loss