如何在对扭曲或光线不敏感的情况下从照片中定位和提取迷宫

html5 • 2022年9月15日 pm2:01 • 问答

我一直在问几个关于从 SOF 上的照片中定位和提取迷宫的问题，但是我在不同的照片中得到的答案都没有，甚至在 4 张测试照片中也没有。每次我调整代码以使其适用于 1 张照片时，由于扭曲的角落/部分或光线等，它会在其余照片上失败。我觉得我需要找到一种对扭曲图像不敏感且不同的方法光的强度或迷宫墙壁的不同颜色（迷宫内的线条）。

我一直试图让它在没有运气的情况下工作 3 周。在我放弃这个想法之前，我想问一下是否可以只使用没有 AI 的图像处理来从照片中定位和提取迷宫？如果是的话，你能告诉我怎么做吗？

下面是代码和照片：

import cv2    
import numpy as np

from skimage.exposure import rescale_intensity
from skimage.feature import corner_harris, corner_subpix, corner_peaks
from skimage.io import imread, imshow
from skimage.morphology import reconstruction, binary_erosion, skeletonize, dilation, square
from skimage.morphology.convex_hull import convex_hull_image
from skimage.util import invert
from skmpe import parameters, mpe, OdeSolverMethod

maze=cv2.imread("simple.jpg",0)
ret, maze=cv2.threshold(maze,100,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
h, w = maze.shape
seed = np.zeros_like(maze)
size = 40
hh = h // 2
hw = w // 2
seed[hh-size:hh+size, hw-size:hw+size] = maze[hh-size:hh+size, hw-size:hw+size]
rec1 = reconstruction(seed, maze)
seed2 = np.ones_like(rec1)
ker = np.ones((2,2))
rec1_thicker = cv2.erode(rec1, ker, iterations=1)    

seed2 = seed2 * 255
size2 = 240
lhh = hh - size2
hhh = hh + size2
lhw = hw - size2
hhw = hw + size2
seed2[lhh:hhh, lhw:hhw]=rec1_thicker[lhh:hhh, lhw:hhw]
rec2 = reconstruction(seed2,rec1_thicker, method='erosion')
rec2_inv = invert(rec2 / 255.)
hull = convex_hull_image(rec2_inv)
hull_eroded = binary_erosion(hull, selem=np.ones((5,5)))
coords = corner_peaks(corner_harris(hull_eroded), min_distance=5, threshold_rel=0.02)

import matplotlib.pyplot as plt
fig, axe = plt.subplots(1,4,figsize=(16,8))
axe[0].imshow(maze, 'gray')
axe[1].imshow(rec1, 'gray')
axe[2].imshow(rec2, 'gray')
axe[3].imshow(hull, 'gray')

这是输出图像：

如您所见，第三个图是提取的迷宫，这段代码运行良好，但仅针对这 2 张照片，在这种情况下它们是simple.jpg和 'maze.jpg'...

如果你试过 `hard.jpg' 那么它看起来像这样：

它也失败了middle.jpg：

我已将所有 4 张测试照片上传到OneDrive，供任何有兴趣尝试的人使用。

更新 1

我绘制了所有面具以查看每个面具的作用。

mask = (sat < 16).astype(np.uint8) * 255
mask1 = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT, (31, 31)))
mask2 = cv2.copyMakeBorder(mask1, 10, 10, 10, 10, cv2.BORDER_CONSTANT, 0)
mask3 = cv2.morphologyEx(mask2, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_RECT, (201, 201)))

plt.figure(figsize=(18, 8))
plt.subplot(1, 6, 1), plt.imshow(maze[..., ::-1]), plt.title('White balanced image')
plt.subplot(1, 6, 2), plt.imshow(sat, 'gray'), plt.title('Saturation channel')
plt.subplot(1, 6, 3), plt.imshow(mask, 'gray'), plt.title('sat < 16')
plt.subplot(1, 6, 4), plt.imshow(mask1, 'gray'), plt.title('closed')
plt.subplot(1, 6, 5), plt.imshow(mask2, 'gray'), plt.title('border')
plt.subplot(1, 6, 6), plt.imshow(mask3, 'gray'), plt.title('rect')
plt.tight_layout(), plt.show()

所以在我看来，在整个图像周围制作边框的 mask2 是没有必要的。为什么我们需要mask2？

我还发现 mask2 和 mask3 的分辨率在每个维度上都大了 2 个像素：

maze.shape, sat.shape, mask.shape, mask1.shape, mask2.shape, mask3.shape
((4000, 1840, 3),
 (4000, 1840),
 (4000, 1840),
 (4000, 1840),
 (4002, 1842),
 (4002, 1842))

为什么？

回答

你真的很想得到这些 6.9 美元的菜，他？

对于给定的四个图像，我可以使用以下工作流程获得非常好的结果：

白平衡输入图像以强制接近白纸。我采用了这种方法，使用了图像中心的一个小块，从那个块中，我取了具有最高R + G + B值的像素——假设迷宫总是在图像的中心，并且有一些来自白纸的像素在小补丁。
使用来自HSV 色彩空间的饱和度通道来掩盖白纸，并（粗略地）从图像中裁剪该部分。
在该作物上，执行现有reconstruction方法。

结果如下：

maze.jpg

simple.jpg

middle.jpg

hard.jpg

这是完整的代码：

import cv2
import matplotlib.pyplot as plt
import numpy as np
from skimage.morphology import binary_erosion, reconstruction
from skimage.morphology.convex_hull import convex_hull_image


# https://stackoverflow.com/a/54481969/11089932
def simple_white_balancing(image):
    h, w = image.shape[:2]
    patch = image[int(h/2-20):int(h/2+20), int(w/2-20):int(w/2+20)]
    x, y = cv2.minMaxLoc(np.sum(patch.astype(int), axis=2))[3]
    white_b, white_g, white_r = patch[y, x, ...].astype(float)
    lum = (white_r + white_g + white_b) / 3
    image[..., 0] = image[..., 0] * lum / white_b
    image[..., 1] = image[..., 1] * lum / white_g
    image[..., 2] = image[..., 2] * lum / white_r
    return image


for file in ['maze.jpg', 'simple.jpg', 'middle.jpg', 'hard.jpg']:

    # Read image
    img = cv2.imread(file)

    # Initialize hull image
    h, w = img.shape[:2]
    hull = np.zeros((h, w), np.uint8)

    # Simple white balancing, cf. https://stackoverflow.com/a/54481969/11089932
    img = cv2.GaussianBlur(img, (11, 11), None)
    maze = simple_white_balancing(img.copy())

    # Mask low saturation area
    sat = cv2.cvtColor(maze, cv2.COLOR_BGR2HSV)[..., 1]
    mask = (sat < 16).astype(np.uint8) * 255
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE,
                            cv2.getStructuringElement(cv2.MORPH_RECT,
                                                      (31, 31)))
    mask = cv2.copyMakeBorder(mask, 1, 1, 1, 1, cv2.BORDER_CONSTANT, 0)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN,
                            cv2.getStructuringElement(cv2.MORPH_RECT,
                                                      (201, 201)))

    # Find largest contour in mask (w.r.t. the OpenCV version)
    cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnt = max(cnts, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(cnt)

    # Crop to low saturation area
    cut = cv2.cvtColor(maze[y+1:y+1+h, x+1:x+1+w], cv2.COLOR_BGR2GRAY)

    # Use existing reconstruction approach on low saturation area
    h_c, w_c = cut.shape
    seed = np.zeros_like(cut)
    size = 40
    hh = h_c // 2
    hw = w_c // 2
    seed[hh-size:hh+size, hw-size:hw+size] = cut[hh-size:hh+size, hw-size:hw+size]
    rec = reconstruction(seed, cut)
    rec = cv2.erode(rec, np.ones((2, 2)), iterations=1)

    seed = np.ones_like(rec) * 255
    size = 240
    seed[hh-size:hh+size, hw-size:hw+size] = rec[hh-size:hh+size, hw-size:hw+size]
    rec = reconstruction(seed, rec, method='erosion').astype(np.uint8)
    rec = cv2.threshold(rec, np.quantile(rec, 0.25), 255, cv2.THRESH_BINARY_INV)[1]

    hull[y+1:y+1+h, x+1:x+1+w] = convex_hull_image(rec) * 255

    plt.figure(figsize=(18, 8))
    plt.subplot(1, 5, 1), plt.imshow(img[..., ::-1]), plt.title('Original image')
    plt.subplot(1, 5, 2), plt.imshow(maze[..., ::-1]), plt.title('White balanced image')
    plt.subplot(1, 5, 3), plt.imshow(sat, 'gray'), plt.title('Saturation channel')
    plt.subplot(1, 5, 4), plt.imshow(hull, 'gray'), plt.title('Obtained convex hull')
    plt.subplot(1, 5, 5), plt.imshow(cv2.bitwise_and(img, img, mask=hull)[..., ::-1])
    plt.tight_layout(), plt.savefig(file + 'output.png'), plt.show()

当然，不能保证这种方法适用于接下来的五张左右的图像，您正在努力。一般来说，尝试标准化图像采集（旋转、照明）以获得更一致的图像。否则，您最终将需要一些机器学习方法......

----------------------------------------
System information
----------------------------------------
Platform:      Windows-10-10.0.16299-SP0
Python:        3.9.1
PyCharm:       2021.1.1
Matplotlib:    3.4.1
NumPy:         1.20.2
OpenCV:        4.5.1
scikit-image:  0.18.1
----------------------------------------

以上是如何在对扭曲或光线不敏感的情况下从照片中定位和提取迷宫的全部内容。

THE END

二维码

覆盖订阅是否会自动取消订阅之前的值？

< <上一篇

在手写的d.ts文件中，如何从模块根目录中的一个命名空间公开函数？

下一篇>>

搜索内容

如何在对扭曲或光线不敏感的情况下从照片中定位和提取迷宫

回答

目录

目录

推荐文章

最新文章