如何使用OpenCV裁剪图像中的白斑并制作护照尺寸的照片
我是 OpenCV 的新手,我的图像需要裁剪为完美的护照尺寸照片。我有成千上万的图像需要像这样自动裁剪和拉直。如果图像太模糊且无法裁剪,我需要将其复制到被拒绝的文件夹中。我尝试使用 haar 级联,但这种方法只给了我面子。但我需要一张照片裁剪背景的脸。谁能告诉我如何在 OpenCV 或其他任何代码中编写代码?
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faceCascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.3,
minNeighbors=3,
minSize=(30, 30)
)
if(len(faces) == 1):
for (x, y, w, h) in faces:
if(x-w < 100 and y-h < 100):
ystart = int(y-y*int(y1)/100)
xstart = int(x-x*int(x1)/100)
yend = int(h+h*int(y1)/100)
xend = int(w+w*int(y2)/100)
roi_color = img[ystart:y + yend, xstart:x + xend]
cv2.imwrite(path, roi_color)
else:
rejectedCount += 1
cv2.imwrite(path, img)
前
后
回答
我将处理您的问题如下:
- 首先我们需要抓住我们感兴趣的点
- 知道普通护照头像的大小(以像素为单位)
如何抓住兴趣点。
我们还有更多的方法:
- 您可以使用
windows油漆应用程序 - 但是为了更加程序化,我们可以使用
cv2. 我将向您展示如何使用 cv2 做到这一点。
另请注意,这不会产生高分辨率图像,您必须自己玩弄代码。
# imports
import numpy as np
import cv2
width = height = 600 # normal passport photo size in pixels
# global variable that will update the points when we clicked on the image
pt1 = []
pt2 = np.float32([[0, 0], [height, 0], [0, width], [height, width]])
def mouseEvent(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
global pt1
if len(pt1) == 4:
pt1 = []
else:
pt1.append([x, y])
while 1:
image = cv2.imread("img.jpg", cv2.IMREAD_UNCHANGED)
cv2.imshow("Original Image", image)
cv2.setMouseCallback("Original Image", mouseEvent)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if len(pt1) == 4:
break
然后我们使用两个cv2函数,它们是getPerspectiveTransform和warpPerspective。该getPerspectiveTransform()会接受两个点,这我们pt1和pt2那么我们要调用的warpPerspective()函数传递三个位置ARGS,图像,矩阵和图像形状:
# imports
import numpy as np
import cv2
width = height = 600 # normal passport photo size in pixels
# global variable that will update the points when we clicked on the image
pt1 = []
pt2 = np.float32([[0, 0], [height, 0], [0, width], [height, width]])
def mouseEvent(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
global pt1
if len(pt1) == 4:
pt1 = []
else:
pt1.append([x, y])
while 1:
image = cv2.imread("img.jpg", cv2.IMREAD_UNCHANGED)
cv2.imshow("Original Image", image)
cv2.setMouseCallback("Original Image", mouseEvent)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if len(pt1) == 4:
break
我知道这不是一个好的解释,但你明白了。整个代码程序如下所示:
image = cv2.imread("img.jpg", 0)
matrix = cv2.getPerspectiveTransform(np.float32(pt1), pt2)
image = cv2.warpPerspective(image, matrix, image.shape)
cv2.imshow("Wrap Perspective", image)
cv2.waitKey(0)
- 当您运行以下代码时,将显示一个图像。
- 要使用此程序,您必须从 开始按顺序单击四个点
A-D。例如,如果这是您的图片:
------------------
| (a) (b)|
| |
| |
| |
| |
| |
| (c) (d)|
-------------------
其中 a、b、c 和 d 是您对图像感兴趣的点crop。
演示
点击点1再2然后3,最后4得到上述结果
- “使用人工智能”是一个非答案(相当于“使用魔法”),级联分类器完全不适合选择这些角点,因为这些角点会发生一些旋转。众所周知,当事物旋转时,级联分类器会失败。
回答
这是通过键入图像周围的黑线在 Python/OpenCV 中提取照片的一种方法。
输入:
- Read the input
- Pad the image with white so that the lines can be extended until intersection
- Threshold on black to extract the lines
- Apply morphology close to try to connect the lines somewhat
- Get the contours and filter on area drawing the contours on a black background
- Apply morphology close again to fill the line centers
- Skeletonize to thin the lines
- Get the Hough lines and draw them as white on a black background
- Floodfill the center of the rectangle of lines to fill with mid-gray. Then convert that image to binary so that the gray becomes white and all else is black.
- Get the coordinates of all non-black pixels and then from the coordinates get the rotated rectangle.
- Use the angle and center of the rotated rectangle to unrotated both the padded image and this mask image via an Affine warp
- (Alternately, get the four corners of the rotated rectangle from the mask and then project that to the padded input domain using the affine matrix)
- Get the coordinates of all non-black pixels in the unrotated mask and compute its rotated rectangle.
- Get the bounding box of the (un-)rotated rectangle
- Use those bounds to crop the padded image
- Save the results
import cv2
import numpy as np
import math
from skimage.morphology import skeletonize
# read image
img = cv2.imread('passport.jpg')
ht, wd = img.shape[:2]
# pad image with white by 20% on all sides
padpct = 20
xpad = int(wd*padpct/100)
ypad = int(ht*padpct/100)
imgpad = cv2.copyMakeBorder(img, ypad, ypad, xpad, xpad, borderType=cv2.BORDER_CONSTANT, value=(255,255,255))
ht2, wd2 = imgpad.shape[:2]
# threshold on black
low = (0,0,0)
high = (20,20,20)
# threshold
thresh = cv2.inRange(imgpad, low, high)
# apply morphology to connect the white lines
kernel = np.ones((5,5), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter on area
mask = np.zeros((ht2,wd2), dtype=np.uint8)
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 20:
cv2.drawContours(mask, [cntr], 0, 255, 1)
# apply morphology to connect the white lines and divide by 255 to make image in range 0 to 1
kernel = np.ones((5,5), np.uint8)
bmask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)/255
# apply thinning (skeletonizing)
skeleton = skeletonize(bmask)
skeleton = (255*skeleton).clip(0,255).astype(np.uint8)
# get hough lines
line_img = np.zeros_like(imgpad, dtype=np.uint8)
lines= cv2.HoughLines(skeleton, 1, math.pi/180.0, 90, np.array([]), 0, 0)
a,b,c = lines.shape
for i in range(a):
rho = lines[i][0][0]
theta = lines[i][0][1]
a = math.cos(theta)
b = math.sin(theta)
x0, y0 = a*rho, b*rho
pt1 = ( int(x0+1000*(-b)), int(y0+1000*(a)) )
pt2 = ( int(x0-1000*(-b)), int(y0-1000*(a)) )
cv2.line(line_img, pt1, pt2, (255, 255, 255), 1)
# floodfill with mid-gray (128)
xcent = int(wd2/2)
ycent = int(ht2/2)
ffmask = np.zeros((ht2+2, wd2+2), np.uint8)
mask2 = line_img.copy()
mask2 = cv2.floodFill(mask2, ffmask, (xcent,ycent), (128,128,128))[1]
# convert mask2 to binary
mask2[mask2 != 128] = 0
mask2[mask2 == 128] = 255
mask2 = mask2[:,:,0]
# get coordinates of all non-zero pixels
# NOTE: must transpose since numpy coords are y,x and opencv uses x,y
coords = np.column_stack(np.where(mask2.transpose() > 0))
# get rotated rectangle from coords
rotrect = cv2.minAreaRect(coords)
(center), (width,height), angle = rotrect
# from https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle trends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, just take the inverse of the angle to make
# it positive
else:
angle = -angle
# compute correction rotation
rotation = -angle - 90
# compute rotation affine matrix
M = cv2.getRotationMatrix2D(center, rotation, scale=1.0)
# unrotate imgpad and mask2 using affine warp
rot_img = cv2.warpAffine(imgpad, M, (wd2, ht2), flags=cv2.INTER_CUBIC, borderValue=(0,0,0))
rot_mask2= cv2.warpAffine(mask2, M, (wd2, ht2), flags=cv2.INTER_CUBIC, borderValue=(0,0,0))
# get coordinates of all non-zero pixels
# NOTE: must transpose since numpy coords are y,x and opencv uses x,y
coords2 = np.column_stack(np.where(rot_mask2.transpose() > 0))
# get bounding box
x,y,w,h = cv2.boundingRect(coords2)
print(x,y,w,h)
# crop rot_img
result = rot_img[y:y+h, x:x+w]
# save resulting images
cv2.imwrite('passport_pad.jpg',imgpad)
cv2.imwrite('passport_thresh.jpg',thresh)
cv2.imwrite('passport_morph.jpg',morph)
cv2.imwrite('passport_mask.jpg',mask)
cv2.imwrite('passport_skeleton.jpg',skeleton)
cv2.imwrite('passport_line_img.jpg',line_img)
cv2.imwrite('passport_mask2.jpg',mask2)
cv2.imwrite('passport_rot_img.jpg',rot_img)
cv2.imwrite('passport_rot_mask2.jpg',rot_mask2)
cv2.imwrite('passport_result.jpg',result)
# show thresh and result
cv2.imshow("imgpad", imgpad)
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("mask", mask)
cv2.imshow("skeleton", skeleton)
cv2.imshow("line_img", line_img)
cv2.imshow("mask2", mask2)
cv2.imshow("rot_img", rot_img)
cv2.imshow("rot_mask2", rot_mask2)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
填充图像:
阈值图像:
形态学清洗图像:
面具 1 图像:
骨架图像:
(Hough) 线图:
洪水填充线图像 - Mask2:
未旋转的填充图像:
未旋转的 Mask2 图像:
裁剪图像:
- 使其适用于所有图像非常困难。在您的所有图像中,黑线不够突出或不够暗。您在一张图像时也有多余的黑线。图像也会扭曲,因此黑线不是直线,因此霍夫线不会检测到一条线,而是每边检测到多条线。