Skip to content

Commit b98e073

Browse files
committed
feat: 增加相似度算法代码
1 parent 1dbb77f commit b98e073

File tree

4 files changed

+122
-0
lines changed

4 files changed

+122
-0
lines changed

Diff for: .DS_Store

0 Bytes
Binary file not shown.

Diff for: express-backend/.DS_Store

0 Bytes
Binary file not shown.

Diff for: express-backend/crawler/test_dayjs_format.js

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
const customParseFormat = require('dayjs/plugin/customParseFormat');
2+
const dayjs = require('dayjs');
3+
4+
dayjs.extend(customParseFormat);
5+
6+
function test() {
7+
const dayFormat = 'D/M/YYYY';
8+
const dateA = dayjs('3/4/2024', dayFormat);
9+
const dateB = dayjs('12/9/2023', dayFormat);
10+
console.log('dateA :', dateA.toDate());
11+
console.log('dateB :',dateB.toDate());
12+
}
13+
14+
test();

Diff for: similarity_algo/main.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import glob
2+
import os
3+
import requests
4+
from flask import Flask, request, jsonify
5+
from PIL import Image
6+
from io import BytesIO
7+
8+
EXTS = ('jpg', 'jpeg', 'JPG', 'JPEG', 'gif', 'GIF', 'png', 'PNG')
9+
10+
app = Flask(__name__)
11+
12+
def dhash(image, hash_size=8):
13+
# Resize and convert image to grayscale
14+
image = image.resize((hash_size + 1, hash_size), Image.Resampling.LANCZOS)
15+
16+
pixels = list(image.getdata())
17+
# Compare adjacent pixels.
18+
difference = []
19+
for row in range(hash_size):
20+
for col in range(hash_size):
21+
pixel_left = pixels[row * (hash_size + 1) + col]
22+
pixel_right = pixels[row * (hash_size + 1) + col + 1]
23+
difference.append(pixel_left > pixel_right)
24+
25+
# Convert the binary array to a hexadecimal string.
26+
decimal_value = 0
27+
hex_string = []
28+
for index, value in enumerate(difference):
29+
if value:
30+
decimal_value += 2 ** (index % 8)
31+
if (index % 8) == 7:
32+
hex_string.append(hex(decimal_value)[2:].rjust(2, '0'))
33+
decimal_value = 0
34+
35+
return ''.join(hex_string)
36+
37+
def dhash_rgb(image, hash_size=8):
38+
# Ensure image is RGB
39+
if image.mode != 'RGB':
40+
image = image.convert('RGB')
41+
# Split the image into R, G, B channels
42+
r, g, b = image.split()
43+
return dhash(r, hash_size), dhash(g, hash_size), dhash(b, hash_size)
44+
45+
def hamming(h1, h2):
46+
return sum(c1 != c2 for c1, c2 in zip(h1, h2))
47+
48+
def hamming_to_similarity(hamming_distance, max_distance):
49+
return 1 - (hamming_distance / max_distance)
50+
51+
@app.route('/similarity', methods=['POST'])
52+
def calculate_similarity():
53+
if 'file' not in request.files:
54+
return jsonify({'error': 'No file part'}), 400
55+
56+
file = request.files['file']
57+
if file.filename == '':
58+
return jsonify({'error': 'No selected file'}), 400
59+
60+
url = request.form.get('url')
61+
if not url:
62+
return jsonify({'error': 'No URL provided'}), 400
63+
64+
try:
65+
# Download the base image from the URL
66+
response = requests.get(url)
67+
response.raise_for_status()
68+
base_image = Image.open(BytesIO(response.content))
69+
except Exception as e:
70+
return jsonify({'error': 'Failed to download base image', 'message': str(e)}), 400
71+
72+
try:
73+
# Load the candidate image from the uploaded file
74+
candidate_image = Image.open(file.stream)
75+
except Exception as e:
76+
return jsonify({'error': 'Failed to load candidate image', 'message': str(e)}), 400
77+
78+
# Calculate dHash for both images
79+
base_hash_r, base_hash_g, base_hash_b = dhash_rgb(base_image)
80+
hash_r, hash_g, hash_b = dhash_rgb(candidate_image)
81+
82+
# Calculate Hamming distances
83+
ham_dist_r = hamming(base_hash_r, hash_r)
84+
ham_dist_g = hamming(base_hash_g, hash_g)
85+
ham_dist_b = hamming(base_hash_b, hash_b)
86+
87+
max_distance = 8 * 8 # Maximum hamming distance for each channel
88+
similarity_r = hamming_to_similarity(ham_dist_r, max_distance)
89+
similarity_g = hamming_to_similarity(ham_dist_g, max_distance)
90+
similarity_b = hamming_to_similarity(ham_dist_b, max_distance)
91+
92+
# Average similarity over all channels
93+
similarity = (similarity_r + similarity_g + similarity_b) / 3
94+
95+
return jsonify({
96+
'similarity': similarity,
97+
'details': {
98+
'ham_dist_r': ham_dist_r,
99+
'ham_dist_g': ham_dist_g,
100+
'ham_dist_b': ham_dist_b,
101+
'similarity_r': similarity_r,
102+
'similarity_g': similarity_g,
103+
'similarity_b': similarity_b
104+
}
105+
})
106+
107+
if __name__ == '__main__':
108+
app.run(debug=True)

0 commit comments

Comments
 (0)