Added first controls
This commit is contained in:
85
src/myfasthtml/core/matching_utils.py
Normal file
85
src/myfasthtml/core/matching_utils.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _is_subsequence(query: str, target: str) -> tuple[bool, float]:
|
||||
"""
|
||||
Determines if a query string is a subsequence of a target string and calculates
|
||||
a score based on the compactness of the match. The match is case-insensitive.
|
||||
|
||||
The function iterates through each character of the query and checks if it
|
||||
exists in the target string while maintaining the order. If all characters of
|
||||
the query are found in order, it calculates a score based on the smallest
|
||||
window in the target that contains all the matched characters.
|
||||
|
||||
:param query: The query string to check as a subsequence.
|
||||
:param target: The target string in which to find the subsequence.
|
||||
:return: A tuple where the first value is a boolean indicating if a valid
|
||||
subsequence exists, and the second value is a float representing the
|
||||
compactness score of the match.
|
||||
:rtype: tuple[bool, float]
|
||||
"""
|
||||
query = query.lower()
|
||||
target = target.lower()
|
||||
|
||||
positions = []
|
||||
idx = 0
|
||||
|
||||
for char in query:
|
||||
idx = target.find(char, idx)
|
||||
if idx == -1:
|
||||
return False, 0.0
|
||||
positions.append(idx)
|
||||
idx += 1
|
||||
|
||||
# Smallest window containing all matched chars
|
||||
window_size = positions[-1] - positions[0] + 1
|
||||
|
||||
# Score: ratio of query length vs window size (compactness)
|
||||
score = len(query) / window_size
|
||||
|
||||
return True, score
|
||||
|
||||
|
||||
def fuzzy_matching(query: str, choices: list[Any], similarity_threshold: float = 0.7, get_attr=None):
|
||||
"""
|
||||
Perform fuzzy matching on a list of items to find the items that are similar
|
||||
to the given query based on a similarity threshold.
|
||||
|
||||
:param query: The search query to be matched, provided as a string.
|
||||
:param choices: A list of strings representing the items to be compared against the query.
|
||||
:param similarity_threshold: A float value representing the minimum similarity score
|
||||
(between 0 and 1) an item needs to achieve to be considered a match. Defaults to 0.7.
|
||||
:param get_attr: When choice is a object, give the property to use
|
||||
:return: A list of strings containing the items from the input list that meet or exceed
|
||||
the similarity threshold, sorted in descending order of similarity.
|
||||
"""
|
||||
get_attr = get_attr or (lambda x: x)
|
||||
matches = []
|
||||
for file_doc in choices:
|
||||
# Calculate similarity between search term and filename
|
||||
similarity = SequenceMatcher(None, query.lower(), get_attr(file_doc).lower()).ratio()
|
||||
|
||||
if similarity >= similarity_threshold:
|
||||
matches.append((file_doc, similarity))
|
||||
|
||||
# Sort by similarity score (highest first)
|
||||
matches.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Return only the FileDocument objects
|
||||
return [match[0] for match in matches]
|
||||
|
||||
|
||||
def subsequence_matching(query: str, choices: list[Any], get_attr=None):
|
||||
get_attr = get_attr or (lambda x: x)
|
||||
matches = []
|
||||
for item in choices:
|
||||
matched, score = _is_subsequence(query, get_attr(item))
|
||||
if matched:
|
||||
matches.append((item, score))
|
||||
|
||||
# Sort by score (highest first)
|
||||
matches.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
# Return only the FileDocument objects
|
||||
return [match[0] for match in matches]
|
||||
Reference in New Issue
Block a user