Skip to content

Commit

Permalink
Add hopenet model
Browse files Browse the repository at this point in the history
  • Loading branch information
theSoenke committed Jul 14, 2019
1 parent 7059d31 commit ab870e5
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# headpose
# Head Pose Estimation

Implementation of [Fine-Grained Head Pose Estimation Without Keypoints](https://arxiv.org/abs/1710.00925)

python3 head_pose.py --checkpoint hopenet_robust_alpha1.pkl --image face.png
70 changes: 70 additions & 0 deletions head_pose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import argparse

import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from hopenet import Hopenet
from PIL import Image
from torchvision import transforms
from visualization import draw_pose


class HeadPose():
def __init__(self, checkpoint, transform=None):
self.transform = transform
if self.transform is None:
self.transform = transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

num_bins = 66
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.idx_tensor = torch.FloatTensor([idx for idx in range(num_bins)]).to(self.device)
self.model = Hopenet()
checkpoint = torch.load(checkpoint)
self.model.load_state_dict(checkpoint, strict=False)
self.model.to(self.device)
self.model.eval()

@torch.no_grad()
def predict(self, image):
if isinstance(image, list):
image = [self.transform(img) for img in image]
elif isinstance(image, str):
image = Image.open(image)
image = self.transform(image).unsqueeze(dim=0)
else:
image = self.transform(image).unsqueeze(dim=0)

image = image.to(self.device)
yaw, pitch, roll = self.model(image)
yaw = F.softmax(yaw, dim=1)
pitch = F.softmax(pitch, dim=1)
roll = F.softmax(roll, dim=1)

yaw = torch.sum(yaw * self.idx_tensor, dim=1) * 3 - 99
pitch = torch.sum(pitch * self.idx_tensor, dim=1) * 3 - 99
roll = torch.sum(roll * self.idx_tensor, dim=1) * 3 - 99
return yaw.item(), pitch.item(), roll.item()


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint', type=str)
parser.add_argument('--image', type=str)
args = parser.parse_args()

head_pose = HeadPose(checkpoint=args.checkpoint)
yaw, pitch, roll = head_pose.predict(args.image)

print("Yaw: %f" % yaw)
img = cv2.imread(args.image)
img = draw_pose(img, yaw, pitch, roll, tdx=200, tdy=200, size=100)
plt.imshow(img)
plt.show()
131 changes: 131 additions & 0 deletions hopenet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import math

import torch.nn as nn
import torchvision


class Hopenet(nn.Module):
def __init__(self, resnet=50, num_bins=66):
self.inplanes = 64
super().__init__()

if resnet == 50:
block = torchvision.models.resnet.Bottleneck
layers = [3, 4, 6, 3]
else:
raise("ResNet not supported")

self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
self.fc_roll = nn.Linear(512 * block.expansion, num_bins)

for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)


def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)

layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

x = self.avgpool(x)
x = x.view(x.size(0), -1)
pre_yaw = self.fc_yaw(x)
pre_pitch = self.fc_pitch(x)
pre_roll = self.fc_roll(x)

return pre_yaw, pre_pitch, pre_roll


class ResNet(nn.Module):
# ResNet for regression of 3 Euler angles.
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
self.fc_angles = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()

def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)

layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)

x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc_angles(x)
return x
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
torch==1.1.0
torchvision==0.3.0
opencv-python
pillow
34 changes: 34 additions & 0 deletions visualization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from math import cos, sin

import cv2
import numpy as np


def draw_pose(img, yaw, pitch, roll, tdx=None, tdy=None, size=100):
pitch = pitch * np.pi / 180
yaw = -(yaw * np.pi / 180)
roll = roll * np.pi / 180

if tdx == None or tdy == None:
height, width = img.shape[:2]
tdx = width / 2
tdy = height / 2

# X-Axis pointing to right. drawn in red
x1 = size * (cos(yaw) * cos(roll)) + tdx
y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy

# Y-Axis | drawn in green
# v
x2 = size * (-cos(yaw) * sin(roll)) + tdx
y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy

# Z-Axis (out of the screen) drawn in blue
x3 = size * (sin(yaw)) + tdx
y3 = size * (-cos(yaw) * sin(pitch)) + tdy

cv2.line(img, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 3)
cv2.line(img, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 3)
cv2.line(img, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)

return img

0 comments on commit ab870e5

Please sign in to comment.