Source code for deepke.attribution_extraction.standard.models.Capsule

import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../")))
import torch
from . import BasicModule
from module import Embedding, CNN
from module import Capsule as CapsuleLayer

from utils import seq_len_to_mask, to_one_hot


[docs]class Capsule(BasicModule): def __init__(self, cfg): super(Capsule, self).__init__() if cfg.dim_strategy == 'cat': cfg.in_channels = cfg.word_dim + 2 * cfg.pos_dim else: cfg.in_channels = cfg.word_dim # capsule config cfg.input_dim_capsule = cfg.out_channels cfg.num_capsule = cfg.num_attributes self.num_attributes = cfg.num_attributes self.embedding = Embedding(cfg) self.cnn = CNN(cfg) self.capsule = CapsuleLayer(cfg)
[docs] def forward(self, x): word, lens, entity_pos, attribute_value_pos = x['word'], x['lens'], x['entity_pos'], x['attribute_value_pos'] mask = seq_len_to_mask(lens) inputs = self.embedding(word, entity_pos, attribute_value_pos) primary, _ = self.cnn(inputs) # 由于长度改变,无法定向mask,不mask可可以,毕竟primary capsule 就是粗粒度的信息 output = self.capsule(primary) output = output.norm(p=2, dim=-1) # 求得模长再返回值 return output # [B, N]
[docs] def loss(self, predict, target, reduction='mean'): m_plus, m_minus, loss_lambda = 0.9, 0.1, 0.5 target = to_one_hot(target, self.num_attributes) max_l = (torch.relu(m_plus - predict))**2 max_r = (torch.relu(predict - m_minus))**2 loss = target * max_l + loss_lambda * (1 - target) * max_r loss = torch.sum(loss, dim=-1) if reduction == 'sum': return loss.sum() else: # 默认情况为求平均 return loss.mean()