This commit is contained in:
2014-04-22 15:50:30 +02:00
2 changed files with 202 additions and 85 deletions

View File

@@ -61,99 +61,29 @@ feature -- Access
item: STRING
-- Contents with escaped entities if any
feature -- Conversion
unescaped_string_8: STRING_8
-- Unescaped string from `item'
-- Unescaped string from `item'.
--| note: valid only if `item' does not encode any unicode character.
local
s: like item
i, n: INTEGER
c: CHARACTER
do
s := item
n := s.count
create Result.make (n)
from i := 1 until i > n loop
c := s[i]
if c = '\' then
if i < n then
inspect s[i+1]
when '\' then
Result.append_character ('\')
i := i + 2
when '%"' then
Result.append_character ('%"')
i := i + 2
when 'n' then
Result.append_character ('%N')
i := i + 2
when 'r' then
Result.append_character ('%R')
i := i + 2
when 'u' then
--| Leave Unicode \uXXXX unescaped
Result.append_character ('\')
i := i + 1
else
Result.append_character ('\')
i := i + 1
end
else
Result.append_character ('\')
i := i + 1
end
else
Result.append_character (c)
i := i + 1
end
end
create Result.make (s.count)
unescape_to_string_8 (Result)
end
unescaped_string_32: STRING_32
-- Unescaped string 32 from `item'
--| some encoders uses UTF-8 , and not the recommended pure json encoding
--| thus, let's support the UTF-8 encoding during decoding.
local
s: like item
i, n: INTEGER
c: CHARACTER
hex: STRING
s: READABLE_STRING_8
do
s := item
n := s.count
create Result.make (n)
from i := 1 until i > n loop
c := s[i]
if c = '\' then
if i < n then
inspect s[i+1]
when '\' then
Result.append_character ('\')
i := i + 2
when '%"' then
Result.append_character ('%"')
i := i + 2
when 'n' then
Result.append_character ('%N')
i := i + 2
when 'r' then
Result.append_character ('%R')
i := i + 2
when 'u' then
hex := s.substring (i+2, i+2+4 - 1)
if hex.count = 4 then
Result.append_code (hexadecimal_to_natural_32 (hex))
end
i := i + 2 + 4
else
Result.append_character ('\')
i := i + 1
end
else
Result.append_character ('\')
i := i + 1
end
else
Result.append_character (c.to_character_32)
i := i + 1
end
end
create Result.make (s.count)
unescape_to_string_32 (Result)
end
representation: STRING
@@ -165,6 +95,156 @@ feature -- Access
Result.append_character ('%"')
end
unescape_to_string_8 (a_output: STRING_8)
-- Unescape string `item' into `a_output'.
--| note: valid only if `item' does not encode any unicode character.
local
s: like item
i, n: INTEGER
c: CHARACTER
do
s := item
n := s.count
from i := 1 until i > n loop
c := s[i]
if c = '\' then
if i < n then
inspect s[i+1]
when '\' then
a_output.append_character ('\')
i := i + 2
when '%"' then
a_output.append_character ('%"')
i := i + 2
when 'b' then
a_output.append_character ('%B')
i := i + 2
when 'f' then
a_output.append_character ('%F')
i := i + 2
when 'n' then
a_output.append_character ('%N')
i := i + 2
when 'r' then
a_output.append_character ('%R')
i := i + 2
when 't' then
a_output.append_character ('%T')
i := i + 2
when 'u' then
--| Leave Unicode \uXXXX unescaped
a_output.append_character ('\')
i := i + 1
else
a_output.append_character ('\')
i := i + 1
end
else
a_output.append_character ('\')
i := i + 1
end
else
a_output.append_character (c)
i := i + 1
end
end
end
unescape_to_string_32 (a_output: STRING_32)
-- Unescape string `item' into `a_output' string 32.
--| some encoders uses UTF-8 , and not the recommended pure json encoding
--| thus, let's support the UTF-8 encoding during decoding.
local
s: READABLE_STRING_8
i, n: INTEGER
c: NATURAL_32
ch: CHARACTER_8
hex: READABLE_STRING_8
do
s := item
n := s.count
from i := 1 until i > n loop
ch := s.item (i)
if ch = '\' then
if i < n then
inspect s[i+1]
when '\' then
a_output.append_character ('\')
i := i + 2
when '%"' then
a_output.append_character ('%"')
i := i + 2
when 'b' then
a_output.append_character ('%B')
i := i + 2
when 'f' then
a_output.append_character ('%F')
i := i + 2
when 'n' then
a_output.append_character ('%N')
i := i + 2
when 'r' then
a_output.append_character ('%R')
i := i + 2
when 't' then
a_output.append_character ('%T')
i := i + 2
when 'u' then
hex := s.substring (i + 2, i + 5) -- i+2 , i+2+4-1
if hex.count = 4 then
a_output.append_code (hexadecimal_to_natural_32 (hex))
end
i := i + 6 -- i +2 +4
else
a_output.append_character ('\')
i := i + 1
end
else
a_output.append_character ('\')
i := i + 1
end
else
c := ch.natural_32_code
if c <= 0x7F then
-- 0xxxxxxx
check ch = c.to_character_32 end
a_output.append_character (ch)
elseif c <= 0xDF then
-- 110xxxxx 10xxxxxx
i := i + 1
if i <= n then
a_output.append_code (
((c & 0x1F) |<< 6) |
(s.code (i) & 0x3F)
)
end
elseif c <= 0xEF then
-- 1110xxxx 10xxxxxx 10xxxxxx
i := i + 2
if i <= n then
a_output.append_code (
((c & 0xF) |<< 12) |
((s.code (i - 1) & 0x3F) |<< 6) |
(s.code (i) & 0x3F)
)
end
elseif c <= 0xF7 then
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
i := i + 3
if i <= n then
a_output.append_code (
((c & 0x7) |<< 18) |
((s.code (i - 2) & 0x3F) |<< 12) |
((s.code (i - 1) & 0x3F) |<< 6) |
(s.code (i) & 0x3F)
)
end
end
i := i + 1
end
end
end
feature -- Visitor pattern
accept (a_visitor: JSON_VISITOR)
@@ -213,8 +293,18 @@ feature {NONE} -- Implementation
is_hexadecimal (s: READABLE_STRING_8): BOOLEAN
-- Is `s' an hexadecimal value?
local
i: INTEGER
do
Result := across s as scur all scur.item.is_hexa_digit end
from
Result := True
i := 1
until
i > s.count or not Result
loop
Result := s[i].is_hexa_digit
i := i + 1
end
end
hexadecimal_to_natural_32 (s: READABLE_STRING_8): NATURAL_32
@@ -264,8 +354,11 @@ feature {NONE} -- Implementation
inspect c
when '%"' then Result.append_string ("\%"")
when '\' then Result.append_string ("\\")
when '%R' then Result.append_string ("\r")
when '%B' then Result.append_string ("\b")
when '%F' then Result.append_string ("\f")
when '%N' then Result.append_string ("\n")
when '%R' then Result.append_string ("\r")
when '%T' then Result.append_string ("\t")
else
Result.extend (c)
end
@@ -292,8 +385,11 @@ feature {NONE} -- Implementation
inspect c
when '%"' then Result.append_string ("\%"")
when '\' then Result.append_string ("\\")
when '%R' then Result.append_string ("\r")
when '%B' then Result.append_string ("\b")
when '%F' then Result.append_string ("\f")
when '%N' then Result.append_string ("\n")
when '%R' then Result.append_string ("\r")
when '%T' then Result.append_string ("\t")
else
Result.extend (c)
end

View File

@@ -1,4 +1,4 @@
note
note
description: "[
Eiffel tests that can be executed by testing tool.
]"
@@ -62,6 +62,27 @@ feature -- Tests Pass
end
end
test_json_utf_8_pass1
local
parse_json: like new_json_parser
utf: UTF_CONVERTER
s: READABLE_STRING_32
do
s := {STRING_32} "{ %"nihaoma%": %"你好吗\t?%" }"
parse_json := new_json_parser (utf.string_32_to_utf_8_string_8 (s))
json_value := parse_json.parse_json
assert ("utf8.pass1.json", parse_json.is_parsed = True)
if
attached {JSON_OBJECT} json_value as jo and then
attached {JSON_STRING} jo.item ("nihaoma") as js
then
assert ("utf8.nihaoma", js.unescaped_string_32.same_string ({STRING_32} "你好吗%T?"))
else
assert ("utf8.nihaoma", False)
end
end
feature -- Tests Failures
test_json_fail1
--