|
1 | | -#BF Search Project |
2 | 1 | import cv2 |
| 2 | +import torch |
| 3 | +import numpy as np |
| 4 | +from PIL import Image |
| 5 | +import os |
| 6 | +import clip |
| 7 | + # Using openai-clip package |
3 | 8 |
|
4 | | -# Define the paths of the target images |
5 | | -target_image_paths = [ |
6 | | -'C:\\Users\\Kushagra pathak\\Desktop\\python\\testimg\\765f28e4-33d3-4a9c-be0e-4bc9e6cee4fe.jfif', |
7 | | - 'C:\\Users\\Kushagra pathak\\Desktop\\python\\testimg\\93c4ec94-5e2e-4e44-9baf-6f7f43eeb5c7.jfif', |
8 | | - 'C:\\Users\\Kushagra pathak\\Desktop\\python\\testimg\\d612eb6b-089c-4d91-b435-a03d51e01adf.jfif', |
9 | | - # Add more image paths as needed |
10 | | -] |
| 9 | +device = "cuda" if torch.cuda.is_available() else "cpu" |
| 10 | +model, preprocess = clip.load("ViT-B/32", device=device) |
11 | 11 |
|
12 | | -# Create a list to store the target images and their descriptors |
13 | | -target_images = [] |
14 | | -target_keypoints = [] |
15 | | -target_descriptors = [] |
| 12 | +# Folder with your 5 images |
| 13 | +dataset_folder = r"D:\Projects\Brute-Force-main\testimg" |
| 14 | +dataset_embeddings = [] |
| 15 | +image_files = [] |
16 | 16 |
|
17 | | -# Create a feature detector |
18 | | -orb = cv2.ORB_create() |
| 17 | +print("Loading dataset images...") |
| 18 | +for img_name in os.listdir(dataset_folder): |
| 19 | + path = os.path.join(dataset_folder, img_name) |
| 20 | + try: |
| 21 | + image = preprocess(Image.open(path)).unsqueeze(0).to(device) |
| 22 | + with torch.no_grad(): |
| 23 | + embedding = model.encode_image(image) |
| 24 | + embedding /= embedding.norm(dim=-1, keepdim=True) |
| 25 | + dataset_embeddings.append(embedding) |
| 26 | + image_files.append(path) |
| 27 | + except Exception as e: |
| 28 | + print(f"Failed loading {path}: {e}") |
19 | 29 |
|
20 | | -# Load the target images and compute descriptors |
21 | | -for path in target_image_paths: |
22 | | - target_image = cv2.imread(path) |
23 | | - keypoints_target, descriptors_target = orb.detectAndCompute(target_image, None) |
24 | | - if descriptors_target is not None: |
25 | | - target_images.append(target_image) |
26 | | - target_keypoints.append(keypoints_target) |
27 | | - target_descriptors.append(descriptors_target) |
| 30 | +print(f"Loaded {len(image_files)} images.") |
28 | 31 |
|
29 | | -# Create a feature matcher |
30 | | -bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) |
31 | | - |
32 | | -# Initialize the camera |
33 | | -camera = cv2.VideoCapture(0) |
| 32 | +# Start webcam |
| 33 | +cap = cv2.VideoCapture(0) |
34 | 34 |
|
35 | 35 | while True: |
36 | | - # Capture frame-by-frame |
37 | | - ret, frame = camera.read() |
38 | | - |
39 | | - # Detect features in the frame |
40 | | - keypoints_frame, descriptors_frame = orb.detectAndCompute(frame, None) |
| 36 | + ret, frame = cap.read() |
| 37 | + if not ret: |
| 38 | + break |
41 | 39 |
|
42 | | - best_match_idx = None # Index of the best matching target image |
43 | | - best_match_distance = float('inf') # Initial distance set to infinity |
| 40 | + small_frame = cv2.resize(frame, (320, 240)) |
| 41 | + rgb = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB) |
| 42 | + pil_img = Image.fromarray(rgb) |
| 43 | + image = preprocess(pil_img).unsqueeze(0).to(device) |
44 | 44 |
|
45 | | - # Match the features between the frame and each target image |
46 | | - for i, descriptors_target in enumerate(target_descriptors): |
47 | | - matches = bf.match(descriptors_frame, descriptors_target) |
48 | | - matches = sorted(matches, key=lambda x: x.distance) |
| 45 | + with torch.no_grad(): |
| 46 | + query_embedding = model.encode_image(image) |
| 47 | + query_embedding /= query_embedding.norm(dim=-1, keepdim=True) |
49 | 48 |
|
50 | | - # Check if the current target image has a better match |
51 | | - if matches[0].distance < best_match_distance: |
52 | | - best_match_idx = i |
53 | | - best_match_distance = matches[0].distance |
| 49 | + similarities = [torch.cosine_similarity(query_embedding, emb).item() for emb in dataset_embeddings] |
| 50 | + best_idx = np.argmax(similarities) |
| 51 | + best_score = similarities[best_idx] |
54 | 52 |
|
55 | | - if best_match_idx is not None: |
56 | | - # Draw the best match on the frame |
57 | | - matched_frame = cv2.drawMatches( |
58 | | - frame, keypoints_frame, |
59 | | - target_images[best_match_idx], target_keypoints[best_match_idx], [matches[0]], None, flags=2 |
60 | | - ) |
61 | | - cv2.imshow('Object Detection', matched_frame) |
| 53 | + if best_score > 0.60: |
| 54 | + matched_img = cv2.imread(image_files[best_idx]) |
| 55 | + matched_img = cv2.resize(matched_img, (small_frame.shape[1], small_frame.shape[0])) |
| 56 | + combined = np.hstack((small_frame, matched_img)) |
| 57 | + label = f"Match: {os.path.basename(image_files[best_idx])} ({best_score:.2f})" |
| 58 | + cv2.putText(combined, label, (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) |
| 59 | + cv2.imshow("Object Match", combined) |
62 | 60 | else: |
63 | | - cv2.imshow('Object Detection', frame) |
| 61 | + cv2.imshow("Object Match", small_frame) |
64 | 62 |
|
65 | | - # Check for the 'q' key to exit the program |
66 | 63 | if cv2.waitKey(1) & 0xFF == ord('q'): |
67 | 64 | break |
68 | 65 |
|
69 | | -# Release the camera and close all windows |
70 | | -camera.release() |
| 66 | +cap.release() |
71 | 67 | cv2.destroyAllWindows() |
0 commit comments