Captcha Bypass with OCR
Shape Detection
import base64
from bs4 import BeautifulSoup
from io import BytesIO
import numpy as np
import requests
url = "https://example.com/login"
# 1. Send request to get HTML response.
resp = requests.get(url)
# 2. Parse HTML to extract an img element to be detected the shape.
soup = BeautifulSoup(resp.text, 'html.parser')
img_tag = soup.find('img')
img_src = img_tag.get('src')
# 3. Decode Base64 and retrieve an image data.
_, base64_data = img_src.split(',')
img_data = base64.b64decode(base64_data)
image = np.array(Image.open(BytesIO(img_data)))
# 4. Detect a shape of the image
shape = ""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 1.5)
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# 4-a. Detect circle
circle = cv2.HoughCircles(blurred, cv2.HOUGH_GRADIENT, 1, 20, param1=50, param2=30, minRadius=0, maxRadius=0)
if circle is not None:
shape = "circle"
else:
# 4-b. Detect other shapes
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * perimeter, True)
if len(approx) == 3:
shape = "triangle"
break
elif len(approx) == 4:
x, y, w, h = cv2.boundingRect(approx)
aspect_ratio = float(w) / h
if 0.95 <= aspect_ratio <= 1.05:
shape = "square"
break
# 5. Use this 'shape' value for resolving captcha...Math Equation Solving
References
Last updated