CaseBasedReasoning/similarity.py

import math
from typing import Any, Callable

"""
METRIC SIMILARITY FUNCTIONS
"""
def manhattan_sim(q_val: float, c_val: float) -> float:
    m_dist = lambda x, y: abs(x - y)
    return 1 / (1 + m_dist(q_val, c_val))

def euclid_sim(q_val: float, c_val: float) -> float:
    e_dist = lambda x, y: math.sqrt((x - y)**2)
    return 1 / (1 + e_dist(q_val, c_val))


METRIC_SIMS = [manhattan_sim, euclid_sim]

"""
SYMBOLIC SIMILARITY
"""
def symbolic_sim(q_field_name: str, c_field_name: str, sim_matrix: dict) -> float:
    return sim_matrix[q_field_name][c_field_name]


SYMBOLIC_SIMS = [symbolic_sim]

"""
CHARACTER EDIT DISTANCE
"""
def edit_distance(word_1: str, word_2: str, to_same_case: bool = True) -> int:

    if word_1 == word_2:
        return 0

    if to_same_case:
        word_1, word_2 = [word.upper() for word in (word_1, word_2)]
    
    word_1, word_2 = list(word_1), list(word_2)
    longer_word = word_1 if len(word_1) > len(word_2) else word_2

    i, count = 0, 0
    while i < len(longer_word):
        
        # word_2 is longer -> add current char of word_2
        if i >= len(word_1):
            word_1.append(word_2[i])
            count += 1
            #continue

        # word_1 is longer -> remove current char of word_1
        if i >= len(word_2):
            word_1.pop(i)
            count += 1
            continue

        # same char -> skip word
        if word_1[i] == word_2[i]:
            i += 1
            continue

        # not in the beginning or the end
        if i > 0 and i < len(word_1):
            """
            previous char is same and current char of word_1 is same as next char of word_2
            -> fill current char of word_2 between last and next char of word_1
            e.g. word_1[i-1] = "M" ;                   word_1[i]   = "R"
            word_2[i-1] = "M" ; word_1[i] = "A" ; word_2[i+1] = "R"
            """
            if word_1[i-1] == word_2[i-1] and word_1[i] == word_2[i+1]:
                word_1.insert(i, word_2[i])
                count += 1
                i += 1
                continue

        if word_1[i] != word_2[i]:
            word_1.pop(i)
            count += 1
            continue

    return "".join(word_1), count


STRING_SIMS = [edit_distance]
Initial commit 2022-12-13 13:25:31 +00:00			`import math`
			`from typing import Any, Callable`

			`"""`
			`METRIC SIMILARITY FUNCTIONS`
			`"""`
			`def manhattan_sim(q_val: float, c_val: float) -> float:`
			`m_dist = lambda x, y: abs(x - y)`
			`return 1 / (1 + m_dist(q_val, c_val))`

			`def euclid_sim(q_val: float, c_val: float) -> float:`
			`e_dist = lambda x, y: math.sqrt((x - y)**2)`
			`return 1 / (1 + e_dist(q_val, c_val))`


			`METRIC_SIMS = [manhattan_sim, euclid_sim]`

			`"""`
			`SYMBOLIC SIMILARITY`
			`"""`
			`def symbolic_sim(q_field_name: str, c_field_name: str, sim_matrix: dict) -> float:`
			`return sim_matrix[q_field_name][c_field_name]`


			`SYMBOLIC_SIMS = [symbolic_sim]`

			`"""`
			`CHARACTER EDIT DISTANCE`
			`"""`
			`def edit_distance(word_1: str, word_2: str, to_same_case: bool = True) -> int:`

			`if word_1 == word_2:`
			`return 0`

			`if to_same_case:`
			`word_1, word_2 = [word.upper() for word in (word_1, word_2)]`

			`word_1, word_2 = list(word_1), list(word_2)`
			`longer_word = word_1 if len(word_1) > len(word_2) else word_2`

			`i, count = 0, 0`
			`while i < len(longer_word):`

			`# word_2 is longer -> add current char of word_2`
			`if i >= len(word_1):`
			`word_1.append(word_2[i])`
			`count += 1`
			`#continue`

			`# word_1 is longer -> remove current char of word_1`
			`if i >= len(word_2):`
			`word_1.pop(i)`
			`count += 1`
			`continue`

			`# same char -> skip word`
			`if word_1[i] == word_2[i]:`
			`i += 1`
			`continue`

			`# not in the beginning or the end`
			`if i > 0 and i < len(word_1):`
„similarity.py“ ändern 2023-01-10 11:50:24 +00:00			`"""`
			`previous char is same and current char of word_1 is same as next char of word_2`
			`-> fill current char of word_2 between last and next char of word_1`
			`e.g. word_1[i-1] = "M" ; word_1[i] = "R"`
			`word_2[i-1] = "M" ; word_1[i] = "A" ; word_2[i+1] = "R"`
			`"""`
Initial commit 2022-12-13 13:25:31 +00:00			`if word_1[i-1] == word_2[i-1] and word_1[i] == word_2[i+1]:`
			`word_1.insert(i, word_2[i])`
			`count += 1`
			`i += 1`
			`continue`

			`if word_1[i] != word_2[i]:`
			`word_1.pop(i)`
			`count += 1`
			`continue`

			`return "".join(word_1), count`


			`STRING_SIMS = [edit_distance]`