kaggle-tgs

kaggle

Posted by hadxu on October 22, 2018

kaggle tgs 117解决方案以及code & 总结大神方案

过去一个月,我在打kaggle中的tgs比赛,最终的成绩非常不理想,只有117/3291,银牌。其实可以到90名,没有选择最优的结果。不过无所谓了,都是银牌。

通过这个比赛我学到了很多,其中最重要的是快速将想法转化为代码,并作为结果呈现出来。

tgs简介

Several areas of Earth with large accumulations of oil and gas also have huge deposits of salt below the surface.

But unfortunately, knowing where large salt deposits are precisely is very difficult. Professional seismic imaging still requires expert human interpretation of salt bodies. This leads to very subjective, highly variable renderings. More alarmingly, it leads to potentially dangerous situations for oil and gas company drillers.

To create the most accurate seismic images and 3D renderings, TGS (the world’s leading geoscience data company) is hoping Kaggle’s machine learning community will be able to build an algorithm that automatically and accurately identifies if a subsurface target is salt or not.

简单来说就是图片分割,判断里面是否有盐层的存在。

我们的方案

首先想到的就是使用UNET作为解决方案,通过encode + decoder进行发展,但是我一开始使用的为keras,对于已经训练好的输入不知道如何进行编码。这里有很好的解决方案,通过keras进行编码How to use ResNet34/50 encoder pretrained for Unet in Keras,我开始也采用了这个方案,但是iou并没有上去,但是看到heng公开的代码是Pytorch的,于是我转pytorch,根据heng的方法进行一步一步做下去。这个时候认识了czy,我们一起通过pytroch进行上分。

数据处理

class TsgDataset(Dataset):
    def __init__(self, root, image_ids, augment, mode='train'):
        self.root = root
        self.image_ids = image_ids
        self.mode = mode
        self.augment = augment

        self.depths = pd.read_csv('../input/depths.csv', index_col='id')

    def __getitem__(self, index):
        mask = None
        image_id = self.image_ids[index]
        image = cv2.imread(os.path.join(self.root, 'images/{}.png'.format(image_id)),
                           cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255

        if self.mode in ['train', 'valid']:
            mask = cv2.imread(os.path.join(self.root, 'masks/{}.png'.format(image_id)),
                              cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255
        elif self.mode in ['test']:
            mask = np.array([])

        label = 0 if np.sum(mask == 1) == 0 else 1
        depth = self.depths.loc[image_id]['z']

        index, image, mask = self.augment(index, image, mask)

        # 添加深度信息
        # image = self._add_depth_channels(image)
        # image = np.transpose(image, (2, 0, 1))

        return index, image, mask, label

    def __len__(self):
        return len(self.image_ids)

    def _add_depth_channels(self, image):
        h, w = image.shape
        image = np.stack([image, image, image], 2)
        for row, const in enumerate(np.linspace(0., 1, h)):
            image[row, :, 1] = const
        image[:, :, 2] = image[:, :, 0] * image[:, :, 1]
        return image

模型rensnet34 encoder

class UNetResNet34_128(nn.Module):
    # PyTorch U-Net model using ResNet(34, 50 , 101 or 152) encoder.

    def load_pretrain(self):
        self.resnet.load_state_dict(torch.load('./models/pretrained_weights/resnet34-333f7ec4.pth'))

    def __init__(self):
        super(UNetResNet34_128, self).__init__()
        self.resnet = resnet34()

        self.conv1 = nn.Sequential(
            self.resnet.conv1,
            self.resnet.bn1,
            self.resnet.relu,
        )  # 64
        self.encoder2 = self.resnet.layer1  # 64
        self.encoder3 = self.resnet.layer2  # 128
        self.encoder4 = self.resnet.layer3  # 256
        self.encoder5 = self.resnet.layer4  # 512

        self.center = Dblock(512)

        self.decoder5 = DecoderV2(512, 256)
        self.decoder4 = DecoderV2(256 + 256, 256)
        self.decoder3 = DecoderV2(128 + 256, 128)
        self.decoder2 = DecoderV2(64 + 128, 128)

        self.logit = nn.Sequential(
            nn.Conv2d(64 + 128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 1, kernel_size=1, padding=0),
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(
                    m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):

        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        x = torch.stack([
            (x - mean[2]) / std[2],
            (x - mean[1]) / std[1],
            (x - mean[0]) / std[0],
        ], 1)  # (?,3, 128, 128)

        batch_size, C, H, W = x.shape

        e1 = self.conv1(x)  # (?,64,128,128)
        x = F.max_pool2d(e1, kernel_size=3, stride=2, padding=1)  # (?,64, 64, 64)

        e2 = self.encoder2(x)  # (?, 64,64,64)
        e3 = self.encoder3(e2)  # (?, 128,32,32)
        e4 = self.encoder4(e3)  # (?,256,16,16)
        e5 = self.encoder5(e4)  # (?, 512, 16,16)
        f = self.center(e5)  # (?, 512, 8, 8)

        d5 = self.decoder5(f)  # (?, 256, 16 ,16)
        d4 = self.decoder4(torch.cat([d5, e4], 1))  # (?, 256, 32, 32)
        d3 = self.decoder3(torch.cat([d4, e3], 1))  # (?, 128, 64, 64)
        d2 = self.decoder2(torch.cat([d3, e2], 1))  # (?, 128, 128, 128)
        d1 = torch.cat([d2, e1], 1)  # (?, 192, 128, 128)

        logit_pixel = self.logit(d1)  # (?, 1,128,128)

        return logit_pixel

    def set_mode(self, mode):
        self.mode = mode
        if mode in ['eval', 'valid', 'test']:
            self.eval()
        elif mode in ['train']:
            self.train()
        else:
            raise NotImplementedError

训练

for cv_num, (train_idx, val_idx) in enumerate(cv.split(data_ids, data_class)):

    print('cv:', cv_num)

    f = open('../logs/{}_{}.txt'.format(name, cv_num), 'w+')
    f.close()

    train_ids, val_ids = data_ids[train_idx], data_ids[val_idx]

    train_dataset = TsgDataset(root='../input/train', image_ids=train_ids, augment=train_augment, mode='train')
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

    valid_dataset = TsgDataset(root='../input/train', image_ids=val_ids, augment=valid_augment, mode='valid')

    net = UNetResNet34_128()
    net.cuda()
    net.load_pretrain()

    lr = 1e-4
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    num_epochs = 70
    epoch = 0

    best_iou_metric = 0
    tic = time()

    while epoch < num_epochs:

        train_loss = 0

        for indices, images, y_masks, _ in train_loader:
            net.set_mode('train')

            optimizer.zero_grad()

            images = images.cuda()
            y_masks = y_masks.cuda()

            logits = net(images)
            logits = logits.squeeze()

            probs = logits.sigmoid()
            loss = dice_bce_loss()(probs, y_masks)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss = train_loss / len(train_loader)
        eval_iou = do_eval(net, valid_dataset)

        print('[%03d] duration: %.2f train_loss: %.4f valid_iou: %.4f' % (
            epoch + 1, time() - tic, train_loss, eval_iou))

        if eval_iou > best_iou_metric:
            best_iou_metric = eval_iou
            print('saving the best model')
            torch.save(net.state_dict(), '../weights/{}_{}.th'.format(name, cv_num))

        with open('../logs/{}_{}.txt'.format(name, cv_num), 'a+') as f:
            f.write('[%03d] valid_iou: %.4f\n' % (epoch + 1, eval_iou))

        if epoch + 1 in [30, 60]:
            lr = lr * 0.1
            print('change learning rate to {}'.format(lr))
            optimizer = torch.optim.Adam(net.parameters(), lr=lr)

        epoch += 1

    net.cpu()

值得注意的是,在比赛早期,出现了很多很多的loss,其中最有影响力的是lovasz_loss

下面汇总大神们的方案

4

9

34

264 fastai

22 fastai

126

11

32

54

136

leak

拼图leak

未来一个月要学习这些代码了。