Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.

Commit 2aededc

Browse files
authored
Push PerCharacterEscaper (#27 closes #21)
2 parents f8f51a4 + 45a7e26 commit 2aededc

File tree

3 files changed

+184
-0
lines changed

3 files changed

+184
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from typing import List
2+
3+
4+
class PerCharacterEscaper:
5+
def __init__(
6+
self,
7+
escape_code_point: int,
8+
escaped_code_points: List[int],
9+
escaped_by_code_points: List[int],
10+
):
11+
self.__escape_code_point = escape_code_point
12+
self.__escaped_code_points = escaped_code_points
13+
self.__escaped_by_code_points = escaped_by_code_points
14+
15+
def __first_offset_needing_escape(self, input_string: str) -> int:
16+
length = len(input_string)
17+
for offset in range(length):
18+
codepoint = ord(input_string[offset])
19+
if (
20+
codepoint == self.__escape_code_point
21+
or codepoint in self.__escaped_code_points
22+
):
23+
return offset
24+
return -1
25+
26+
def escape(self, input_string: str) -> str:
27+
no_escapes = self.__first_offset_needing_escape(input_string)
28+
if no_escapes == -1:
29+
return input_string
30+
else:
31+
result = []
32+
result.append(input_string[:no_escapes])
33+
for char in input_string[no_escapes:]:
34+
codepoint = ord(char)
35+
if codepoint in self.__escaped_code_points:
36+
idx = self.__escaped_code_points.index(codepoint)
37+
result.append(chr(self.__escape_code_point))
38+
result.append(chr(self.__escaped_by_code_points[idx]))
39+
else:
40+
result.append(char)
41+
return "".join(result)
42+
43+
def unescape(self, input_string: str) -> str:
44+
if input_string.endswith(
45+
chr(self.__escape_code_point)
46+
) and not input_string.endswith(chr(self.__escape_code_point) * 2):
47+
raise ValueError(
48+
"Escape character '{}' can't be the last character in a string.".format(
49+
chr(self.__escape_code_point)
50+
)
51+
)
52+
53+
no_escapes = self.__first_offset_needing_escape(input_string)
54+
if no_escapes == -1:
55+
return input_string
56+
else:
57+
result = [input_string[:no_escapes]]
58+
skip_next = False
59+
for i in range(no_escapes, len(input_string)):
60+
if skip_next:
61+
skip_next = False
62+
continue
63+
codepoint = ord(input_string[i])
64+
if codepoint == self.__escape_code_point and (i + 1) < len(
65+
input_string
66+
):
67+
next_codepoint = ord(input_string[i + 1])
68+
if next_codepoint in self.__escaped_by_code_points:
69+
idx = self.__escaped_by_code_points.index(next_codepoint)
70+
result.append(chr(self.__escaped_code_points[idx]))
71+
skip_next = True
72+
else:
73+
result.append(input_string[i + 1])
74+
skip_next = True
75+
else:
76+
result.append(chr(codepoint))
77+
return "".join(result)
78+
79+
@classmethod
80+
def self_escape(cls, escape_policy):
81+
code_points = [ord(c) for c in escape_policy]
82+
escape_code_point = code_points[0]
83+
return cls(escape_code_point, code_points, code_points)
84+
85+
@classmethod
86+
def specified_escape(cls, escape_policy):
87+
code_points = [ord(c) for c in escape_policy]
88+
if len(code_points) % 2 != 0:
89+
raise ValueError(
90+
"Escape policy string must have an even number of characters."
91+
)
92+
escape_code_point = code_points[0]
93+
escaped_code_points = code_points[0::2]
94+
escaped_by_code_points = code_points[1::2]
95+
return cls(escape_code_point, escaped_code_points, escaped_by_code_points)
+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .LineReader import LineReader as LineReader
22
from .Slice import Slice as Slice
3+
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import pytest
2+
3+
from selfie_lib import PerCharacterEscaper
4+
5+
6+
class TestPerCharacterEscaper:
7+
def test_performance_optimization_self(self):
8+
escaper = PerCharacterEscaper.self_escape("`123")
9+
abc = "abc"
10+
# Correct use of 'is' for checking object identity.
11+
assert (
12+
escaper.escape(abc) is abc
13+
), "Escape should return the original object when no change is made"
14+
assert (
15+
escaper.unescape(abc) is abc
16+
), "Unescape should return the original object when no change is made"
17+
18+
# Use '==' for checking value equality.
19+
assert (
20+
escaper.escape("1") == "`1"
21+
), "Escaping '1' should prepend the escape character"
22+
assert (
23+
escaper.escape("`") == "``"
24+
), "Escaping the escape character should duplicate it"
25+
assert (
26+
escaper.escape("abc123`def") == "abc`1`2`3``def"
27+
), "Escaping 'abc123`def' did not produce the expected result"
28+
29+
assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'"
30+
assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'"
31+
assert (
32+
escaper.unescape("abc`1`2`3``def") == "abc123`def"
33+
), "Unescaping 'abc`1`2`3``def' did not produce the expected result"
34+
35+
def test_performance_optimization_specific(self):
36+
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
37+
abc = "abc"
38+
# Correct use of 'is' for object identity.
39+
assert (
40+
escaper.escape(abc) is abc
41+
), "Escape should return the original object when no change is made"
42+
assert (
43+
escaper.unescape(abc) is abc
44+
), "Unescape should return the original object when no change is made"
45+
46+
# Use '==' for value equality.
47+
assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'"
48+
assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'"
49+
assert (
50+
escaper.escape("abc123`def") == "abc`b`c`d`adef"
51+
), "Escaping 'abc123`def' did not produce the expected result"
52+
53+
assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'"
54+
assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'"
55+
assert (
56+
escaper.unescape("abc`1`2`3``def") == "abc123`def"
57+
), "Unescaping 'abc`1`2`3``def' did not produce the expected result"
58+
59+
def test_corner_cases_self(self):
60+
escaper = PerCharacterEscaper.self_escape("`123")
61+
with pytest.raises(ValueError) as excinfo:
62+
escaper.unescape("`")
63+
assert (
64+
str(excinfo.value)
65+
== "Escape character '`' can't be the last character in a string."
66+
), "Unescaping a string ending with a single escape character should raise ValueError"
67+
assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'"
68+
69+
def test_corner_cases_specific(self):
70+
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
71+
with pytest.raises(ValueError) as excinfo:
72+
escaper.unescape("`")
73+
assert (
74+
str(excinfo.value)
75+
== "Escape character '`' can't be the last character in a string."
76+
), "Unescaping a string ending with a single escape character should raise ValueError"
77+
assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'"
78+
79+
def test_roundtrip(self):
80+
escaper = PerCharacterEscaper.self_escape("`<>")
81+
82+
def roundtrip(str):
83+
assert (
84+
escaper.unescape(escaper.escape(str)) == str
85+
), f"Roundtrip of '{str}' did not return the original string"
86+
87+
roundtrip("")
88+
roundtrip("<local>~`/")

0 commit comments

Comments
 (0)