实例
数据集
笔记本
笔记本

真实场景篡改图像检测挑战赛 —— EDA
暂无摘要
笔记本内容
一个简单的 EDA #
对比赛数据的一些分布进行检视
!featurize dataset download 757181ec-0841-4468-b701-54ea53ad2d3c
!featurize dataset download e4dc9a7b-ded4-453d-b887-1f3fb04a5f94
100%|█████████████████████████████████████| 17.7k/17.7k [00:00<00:00, 7.34MiB/s] 🍬 下载完成,正在解压... 🏁 数据集已经成功添加 100%|████████████████████████████████████████| 329M/329M [00:01<00:00, 227MiB/s] 🍬 下载完成,正在解压... 🏁 数据集已经成功添加
import os
import cv2
import random
from tqdm import tqdm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
TRAIN = '/home/featurize/data/train/img'
MASK = '/home/featurize/data/train/mask'
TEST = '/home/featurize/data/test/img'
train = os.listdir(TRAIN)
test = os.listdir(TEST)
mask = os.listdir(MASK)
训练集的图片尺寸分布 #
w,h = [],[]
from tqdm import tqdm
for i in tqdm(test):
if i.endswith('jpg'):
h.append(cv2.imread(os.path.join(TRAIN, i)).shape[0])
w.append(cv2.imread(os.path.join(TRAIN, i)).shape[1])
100%|██████████| 4000/4000 [00:35<00:00, 112.72it/s]
f, ax = plt.subplots(1,3, figsize=(16,4))
sns.histplot(w, ax=ax[0], palette=sns.light_palette("seagreen", as_cmap=True)).set_title('Width');
sns.histplot(h, ax=ax[1], palette=sns.color_palette("RdPu", 10)).set_title('Height');
sns.histplot(np.array(w)/np.array(h), ax=ax[2], palette=sns.color_palette("RdPu", 10)).set_title('W&H Ratio');
测试集的图片尺寸分布 #
w,h = [],[]
from tqdm import tqdm
for i in tqdm(test):
if i.endswith('jpg'):
h.append(cv2.imread(os.path.join(TEST, i)).shape[0])
w.append(cv2.imread(os.path.join(TEST, i)).shape[1])
100%|██████████| 4000/4000 [00:35<00:00, 112.87it/s]
f, ax = plt.subplots(1,3, figsize=(16,4))
sns.histplot(w, ax=ax[0]);
sns.histplot(h, ax=ax[1]);
sns.histplot(np.array(w)/np.array(h), ax=ax[2]);
训练集随机样本 #
f, axs = plt.subplots(4,5, figsize=(16,8))
for i in range(4):
for j in range(5):
axs[i][j].imshow(cv2.imread(os.path.join(TRAIN, train[random.randint(0,len(train)-1)].split('.')[0] + '.jpg'), cv2.IMREAD_UNCHANGED));
测试集随机样本 #
f, axs = plt.subplots(4,5, figsize=(16,8))
for i in range(4):
for j in range(5):
axs[i][j].imshow(cv2.imread(os.path.join(TEST, test[random.randint(0,len(test)-1)].split('.')[0] + '.jpg'), cv2.IMREAD_UNCHANGED));
Mask 随机样本 #
f, axs = plt.subplots(4,5, figsize=(16,8))
for i in range(4):
for j in range(5):
axs[i][j].imshow(cv2.imread(os.path.join(MASK, mask[random.randint(0,len(mask)-1)]), cv2.IMREAD_GRAYSCALE));
计算 Mask 占据图片整体比例 #
ratios = []
for i in tqdm(range(len(mask))):
mask_area = cv2.imread(os.path.join(MASK, mask[i]), cv2.IMREAD_GRAYSCALE)
ratio = np.sum(mask_area/255.) / (mask_area.shape[0] * mask_area.shape[1])
ratios.append(ratio)
100%|██████████| 4000/4000 [00:16<00:00, 237.66it/s]
查看 Mask 占据图片整体比例 #
f, ax = plt.subplots(figsize=(16,16))
sns.histplot(ratios, ax=ax);
评论(0条)