Updated encoder library, especially URL encoders to reuse implementation of percent_encoder.e
Fixed JSON_ENCODER for %T and related. Updated related autotest cases.
This commit is contained in:
@@ -50,8 +50,11 @@ feature -- Encoder
|
||||
inspect c
|
||||
when '%"' then Result.append_string ("\%"")
|
||||
when '\' then Result.append_string ("\\")
|
||||
when '%R' then Result.append_string ("\r")
|
||||
when '%B' then Result.append_string ("\b")
|
||||
when '%F' then Result.append_string ("\f")
|
||||
when '%N' then Result.append_string ("\n")
|
||||
when '%R' then Result.append_string ("\r")
|
||||
when '%T' then Result.append_string ("\t")
|
||||
else
|
||||
Result.extend (c)
|
||||
end
|
||||
@@ -103,12 +106,21 @@ feature -- Decoder
|
||||
when '%"' then
|
||||
Result.append_character ('%"')
|
||||
i := i + 2
|
||||
when 'b' then
|
||||
Result.append_character ('%B')
|
||||
i := i + 2
|
||||
when 'f' then
|
||||
Result.append_character ('%F')
|
||||
i := i + 2
|
||||
when 'n' then
|
||||
Result.append_character ('%N')
|
||||
i := i + 2
|
||||
when 'r' then
|
||||
Result.append_character ('%R')
|
||||
i := i + 2
|
||||
when 't' then
|
||||
Result.append_character ('%T')
|
||||
i := i + 2
|
||||
when 'u' then
|
||||
hex := v.substring (i+2, i+2+4 - 1)
|
||||
if hex.count = 4 then
|
||||
@@ -170,7 +182,7 @@ feature {NONE} -- Implementation
|
||||
end
|
||||
|
||||
note
|
||||
copyright: "2011-2012, Eiffel Software and others"
|
||||
copyright: "Copyright (c) 2011-2014, Eiffel Software and others"
|
||||
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
|
||||
source: "[
|
||||
Eiffel Software
|
||||
|
||||
@@ -66,7 +66,7 @@ feature -- Percent encoding
|
||||
end
|
||||
end
|
||||
|
||||
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]): STRING_8
|
||||
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER]): STRING_8
|
||||
-- Return `s' as percent-encoded value,
|
||||
-- but does not escape character listed in `a_ignore'.
|
||||
do
|
||||
@@ -74,12 +74,12 @@ feature -- Percent encoding
|
||||
append_partial_percent_encoded_string_to (s, Result, a_ignore)
|
||||
end
|
||||
|
||||
append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32])
|
||||
append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER])
|
||||
-- Append `s' as percent-encoded value to `a_result',
|
||||
-- but does not escape character listed in `a_ignore'.
|
||||
local
|
||||
c: NATURAL_32
|
||||
ch: CHARACTER_32
|
||||
ch: CHARACTER_8
|
||||
i,n: INTEGER
|
||||
do
|
||||
has_error := False
|
||||
@@ -109,15 +109,21 @@ feature -- Percent encoding
|
||||
43, 44, 59, 61, -- reserved = sub-delims: +,;=
|
||||
37 -- percent encoding: %
|
||||
then
|
||||
ch := c.to_character_32
|
||||
check c.is_valid_character_8_code end
|
||||
ch := c.to_character_8
|
||||
if across a_ignore as ic some ic.item = ch end then
|
||||
a_result.append_code (c)
|
||||
else
|
||||
append_percent_encoded_character_code_to (c, a_result)
|
||||
end
|
||||
else
|
||||
if across a_ignore as ic some ic.item = ch end then
|
||||
a_result.append_code (c)
|
||||
if c.is_valid_character_8_code then
|
||||
ch := c.to_character_8
|
||||
if across a_ignore as ic some ic.item = ch end then
|
||||
a_result.append_code (c)
|
||||
else
|
||||
append_percent_encoded_character_code_to (c, a_result)
|
||||
end
|
||||
else
|
||||
append_percent_encoded_character_code_to (c, a_result)
|
||||
end
|
||||
|
||||
@@ -20,6 +20,12 @@ inherit
|
||||
{NONE} all
|
||||
end
|
||||
|
||||
PERCENT_ENCODER
|
||||
rename
|
||||
percent_encoded_string as general_encoded_string,
|
||||
percent_decoded_string as general_decoded_string
|
||||
end
|
||||
|
||||
feature -- Access
|
||||
|
||||
name: READABLE_STRING_8
|
||||
@@ -27,10 +33,6 @@ feature -- Access
|
||||
create {IMMUTABLE_STRING_8} Result.make_from_string ("URL-encoded")
|
||||
end
|
||||
|
||||
feature -- Status report
|
||||
|
||||
has_error: BOOLEAN
|
||||
|
||||
feature -- Encoder
|
||||
|
||||
encoded_string (s: READABLE_STRING_32): STRING_8
|
||||
@@ -39,350 +41,16 @@ feature -- Encoder
|
||||
Result := general_encoded_string (s)
|
||||
end
|
||||
|
||||
general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8
|
||||
-- URL-encoded value of `s'.
|
||||
local
|
||||
i, n: INTEGER
|
||||
c: CHARACTER_8
|
||||
l_code: NATURAL_32
|
||||
do
|
||||
has_error := False
|
||||
create Result.make (s.count + s.count // 10)
|
||||
n := s.count
|
||||
from i := 1 until i > n loop
|
||||
l_code := s.code (i)
|
||||
if l_code.is_valid_character_8_code then
|
||||
c := l_code.to_character_8
|
||||
inspect c
|
||||
when
|
||||
'A' .. 'Z',
|
||||
'a' .. 'z', '0' .. '9',
|
||||
'.', '-', '~', '_'
|
||||
then
|
||||
Result.extend (c)
|
||||
else
|
||||
append_url_encoded_char (l_code, Result)
|
||||
end
|
||||
else
|
||||
append_url_encoded_char (l_code, Result)
|
||||
end
|
||||
i := i + 1
|
||||
end
|
||||
end
|
||||
|
||||
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ARRAY [CHARACTER]): STRING_8
|
||||
-- URL-encoded value of `s'.
|
||||
local
|
||||
i, n: INTEGER
|
||||
l_code: NATURAL_32
|
||||
c: CHARACTER_8
|
||||
s8: STRING_8
|
||||
do
|
||||
has_error := False
|
||||
create s8.make (s.count + s.count // 10)
|
||||
Result := s8
|
||||
n := s.count
|
||||
from i := 1 until i > n loop
|
||||
l_code := s.code (i)
|
||||
if l_code.is_valid_character_8_code then
|
||||
c := l_code.to_character_8
|
||||
inspect c
|
||||
when
|
||||
'A' .. 'Z',
|
||||
'a' .. 'z', '0' .. '9',
|
||||
'.', '-', '~', '_'
|
||||
then
|
||||
s8.extend (c)
|
||||
else
|
||||
if a_ignore.has (c) then
|
||||
s8.extend (c)
|
||||
else
|
||||
append_url_encoded_char (l_code, s8)
|
||||
end
|
||||
end
|
||||
else
|
||||
if a_ignore.has (c) then
|
||||
s8.extend (c)
|
||||
else
|
||||
append_url_encoded_char (l_code, s8)
|
||||
end
|
||||
end
|
||||
i := i + 1
|
||||
end
|
||||
end
|
||||
|
||||
feature {NONE} -- encoder character
|
||||
|
||||
append_url_encoded_char (a_code: NATURAL_32; a_output: STRING_GENERAL)
|
||||
local
|
||||
c: INTEGER
|
||||
do
|
||||
if a_code.is_valid_character_8_code then
|
||||
c := a_code.to_integer_32
|
||||
a_output.append_code (37) -- 37 '%%'
|
||||
a_output.append_code (hex_digit [c |>> 4])
|
||||
a_output.append_code (hex_digit [c & 0xF])
|
||||
else
|
||||
has_error := True --| Non-ascii escape not currently supported
|
||||
end
|
||||
end
|
||||
|
||||
hex_digit: SPECIAL [NATURAL_32]
|
||||
-- Hexadecimal digits.
|
||||
once
|
||||
create Result.make_filled (0, 16)
|
||||
Result [0] := {NATURAL_32} 48 -- 48 '0'
|
||||
Result [1] := {NATURAL_32} 49 -- 49 '1'
|
||||
Result [2] := {NATURAL_32} 50 -- 50 '2'
|
||||
Result [3] := {NATURAL_32} 51 -- 51 '3'
|
||||
Result [4] := {NATURAL_32} 52 -- 52 '4'
|
||||
Result [5] := {NATURAL_32} 53 -- 53 '5'
|
||||
Result [6] := {NATURAL_32} 54 -- 54 '6'
|
||||
Result [7] := {NATURAL_32} 55 -- 55 '7'
|
||||
Result [8] := {NATURAL_32} 56 -- 56 '8'
|
||||
Result [9] := {NATURAL_32} 57 -- 57 '9'
|
||||
Result [10] := {NATURAL_32} 65 -- 65 'A'
|
||||
Result [11] := {NATURAL_32} 66 -- 66 'B'
|
||||
Result [12] := {NATURAL_32} 67 -- 67 'C'
|
||||
Result [13] := {NATURAL_32} 68 -- 68 'D'
|
||||
Result [14] := {NATURAL_32} 69 -- 69 'E'
|
||||
Result [15] := {NATURAL_32} 70 -- 70 'F'
|
||||
end
|
||||
|
||||
feature -- Decoder
|
||||
|
||||
decoded_string (v: READABLE_STRING_8): STRING_32
|
||||
-- The URL-encoded equivalent of the given string
|
||||
local
|
||||
i, n: INTEGER
|
||||
c: CHARACTER
|
||||
pr: CELL [INTEGER]
|
||||
changed: BOOLEAN
|
||||
do
|
||||
has_error := False
|
||||
n := v.count
|
||||
create Result.make (n)
|
||||
from i := 1
|
||||
until i > n
|
||||
loop
|
||||
c := v.item (i)
|
||||
inspect c
|
||||
when '+' then
|
||||
changed := True
|
||||
Result.append_character ({CHARACTER_32}' ')
|
||||
when '%%' then
|
||||
-- An escaped character ?
|
||||
if i = n then
|
||||
Result.append_character (c.to_character_32)
|
||||
else
|
||||
changed := True
|
||||
create pr.put (i)
|
||||
Result.append (url_decoded_char (v, pr))
|
||||
i := pr.item
|
||||
end
|
||||
else
|
||||
Result.append_character (c.to_character_32)
|
||||
end
|
||||
i := i + 1
|
||||
end
|
||||
end
|
||||
|
||||
feature {NONE} -- decoded character
|
||||
|
||||
url_decoded_char (buf: STRING_8; posr: CELL [INTEGER]): STRING_32
|
||||
-- Character(s) resulting from decoding the URL-encoded string
|
||||
require
|
||||
stream_exists: buf /= Void
|
||||
posr_exists: posr /= Void
|
||||
valid_start: posr.item <= buf.count
|
||||
local
|
||||
c: CHARACTER
|
||||
i, n, nb: INTEGER
|
||||
not_a_digit: BOOLEAN
|
||||
ascii_pos, ival: INTEGER
|
||||
pos: INTEGER
|
||||
do
|
||||
--| pos is index in stream of escape character ('%')
|
||||
pos := posr.item
|
||||
create Result.make (4)
|
||||
if buf.item (pos + 1) = 'u' then
|
||||
-- An escaped Unicode (ucs2) value, from ECMA scripts
|
||||
-- Has the form: %u<n> where <n> is the UCS value
|
||||
-- of the character (two byte integer, one to 4 chars
|
||||
-- after escape sequence).
|
||||
-- UTF-8 result can be 1 to 4 characters
|
||||
n := buf.count
|
||||
from i := pos + 2
|
||||
until (i > n) or not_a_digit
|
||||
loop
|
||||
c := buf.item (i)
|
||||
if c.is_hexa_digit then
|
||||
ival := ival * 16
|
||||
if c.is_digit then
|
||||
ival := ival + (c |-| '0')
|
||||
else
|
||||
ival := ival + (c.upper |-| 'A') + 10
|
||||
end
|
||||
i := i + 1
|
||||
else
|
||||
not_a_digit := True
|
||||
end
|
||||
end
|
||||
posr.replace (i)
|
||||
-- ival is now UCS2 value; needs conversion to UTF8
|
||||
Result.append_code (ival.as_natural_32)
|
||||
nb := utf8_bytes_in_sequence (buf, pos)
|
||||
else
|
||||
-- ASCII char?
|
||||
ascii_pos := hex_to_integer_32 (buf.substring (pos+1, pos+2))
|
||||
if ascii_pos >= 0x80 and ascii_pos <= 0xff then
|
||||
-- Might be improperly escaped
|
||||
Result.append_code (ascii_pos.as_natural_32)
|
||||
posr.replace (pos + 2)
|
||||
else
|
||||
Result.append_code (ascii_pos.as_natural_32)
|
||||
posr.replace (pos + 2)
|
||||
end
|
||||
end
|
||||
ensure
|
||||
exists: Result /= Void
|
||||
end
|
||||
|
||||
feature {NONE} -- UTF8
|
||||
|
||||
utf8_bytes_in_sequence (s: STRING_8; spos: INTEGER): INTEGER
|
||||
-- If the given character is a legal first byte element in a
|
||||
-- utf8 byte sequence (aka character), then return the number
|
||||
-- of bytes in that sequence
|
||||
-- Result of zero means it's not a utf8 first byte
|
||||
require
|
||||
exists: s /= Void
|
||||
long_enough: s.count >= spos
|
||||
do
|
||||
Result := bytes_in_utf8_char (s.item (spos))
|
||||
end
|
||||
|
||||
bytes_in_utf8_char (v: CHARACTER_8): INTEGER
|
||||
-- If the given byte a legal first byte element in a utf8 sequence,
|
||||
-- then the number of bytes in that character
|
||||
-- Zero denotes an error, i.e. not a legal UTF8 char
|
||||
--
|
||||
-- The first byte of a UTF8 encodes the length
|
||||
local
|
||||
c: NATURAL_8
|
||||
do
|
||||
c := v.code.to_natural_8
|
||||
Result := 1 -- 7 bit ASCII
|
||||
if (c & 0x80) /= 0 then
|
||||
-- Hi bit means not ASCII
|
||||
Result := 0
|
||||
if (c & 0xe0) = 0xc0 then
|
||||
-- If we see a first byte as b110xxxxx
|
||||
-- then we expect a two-byte character
|
||||
Result := 2
|
||||
elseif (c & 0xf0) = 0xe0 then
|
||||
-- If we see a first byte as b1110xxxx
|
||||
-- then we expect a three-byte character
|
||||
Result := 3
|
||||
elseif (c & 0xf8) = 0xf0 then
|
||||
-- If we see a first byte as b11110xxx
|
||||
-- then we expect a four-byte character
|
||||
Result := 4
|
||||
elseif (c & 0xfc) = 0xf8 then
|
||||
-- If we see a first byte as b111110xx
|
||||
-- then we expect a five-byte character
|
||||
Result := 5
|
||||
elseif (c & 0xfe) = 0xfc then
|
||||
-- If we see a first byte as b1111110x
|
||||
-- then we expect a six-byte character
|
||||
Result := 6
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
feature {NONE} -- Hexadecimal and strings
|
||||
|
||||
hex_to_integer_32 (s: STRING): INTEGER_32
|
||||
-- Hexadecimal string `s' converted to INTEGER_32 value
|
||||
require
|
||||
s_not_void: s /= Void
|
||||
local
|
||||
i, nb: INTEGER;
|
||||
char: CHARACTER
|
||||
do
|
||||
nb := s.count
|
||||
|
||||
if nb >= 2 and then s.item (2) = 'x' then
|
||||
i := 3
|
||||
else
|
||||
i := 1
|
||||
end
|
||||
|
||||
from
|
||||
until
|
||||
i > nb
|
||||
loop
|
||||
Result := Result * 16
|
||||
char := s.item (i)
|
||||
if char >= '0' and then char <= '9' then
|
||||
Result := Result + (char |-| '0')
|
||||
else
|
||||
Result := Result + (char.lower |-| 'a' + 10)
|
||||
end
|
||||
i := i + 1
|
||||
end
|
||||
end
|
||||
|
||||
hex_to_integer_64 (s: STRING): INTEGER_64
|
||||
-- Hexadecimal string `s' converted to INTEGER_64 value
|
||||
require
|
||||
s_not_void: s /= Void
|
||||
local
|
||||
i, nb: INTEGER;
|
||||
char: CHARACTER
|
||||
do
|
||||
nb := s.count
|
||||
|
||||
if nb >= 2 and then s.item (2) = 'x' then
|
||||
i := 3
|
||||
else
|
||||
i := 1
|
||||
end
|
||||
|
||||
from
|
||||
until
|
||||
i > nb
|
||||
loop
|
||||
Result := Result * 16
|
||||
char := s.item (i)
|
||||
if char >= '0' and then char <= '9' then
|
||||
Result := Result + (char |-| '0')
|
||||
else
|
||||
Result := Result + (char.lower |-| 'a' + 10)
|
||||
end
|
||||
i := i + 1
|
||||
end
|
||||
end
|
||||
|
||||
hex_to_pointer (s: STRING): POINTER
|
||||
-- Hexadecimal string `s' converted to POINTER value
|
||||
require
|
||||
s_not_void: s /= Void
|
||||
local
|
||||
val_32: INTEGER_32
|
||||
val_64: INTEGER_64
|
||||
do
|
||||
if Pointer_bytes = Integer_64_bytes then
|
||||
val_64 := hex_to_integer_64 (s)
|
||||
($Result).memory_copy ($val_64, Pointer_bytes)
|
||||
else
|
||||
val_32 := hex_to_integer_32 (s)
|
||||
($Result).memory_copy ($val_32, Pointer_bytes)
|
||||
end
|
||||
Result := general_decoded_string (v)
|
||||
end
|
||||
|
||||
note
|
||||
copyright: "2011-2014, Eiffel Software and others"
|
||||
copyright: "Copyright (c) 2011-2014, Eiffel Software and others"
|
||||
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
|
||||
source: "[
|
||||
Eiffel Software
|
||||
|
||||
@@ -14,23 +14,6 @@ class
|
||||
|
||||
inherit
|
||||
URL_ENCODER
|
||||
redefine
|
||||
name,
|
||||
general_encoded_string,
|
||||
encoded_string, partial_encoded_string,
|
||||
decoded_string
|
||||
select
|
||||
encoded_string,
|
||||
decoded_string,
|
||||
has_error
|
||||
end
|
||||
|
||||
UTF8_ENCODER
|
||||
rename
|
||||
general_encoded_string as utf8_general_encoded_string,
|
||||
encoded_string as utf8_encoded_string,
|
||||
decoded_string as utf8_decoded_string,
|
||||
has_error as utf8_has_error
|
||||
redefine
|
||||
name
|
||||
end
|
||||
@@ -42,43 +25,8 @@ feature -- Access
|
||||
create {IMMUTABLE_STRING_8} Result.make_from_string ("UTF8-URL-encoded")
|
||||
end
|
||||
|
||||
feature -- Encoder
|
||||
|
||||
encoded_string (s: READABLE_STRING_32): STRING_8
|
||||
-- URL-encoded value of `s'.
|
||||
do
|
||||
Result := general_encoded_string (s)
|
||||
end
|
||||
|
||||
general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8
|
||||
do
|
||||
Result := utf8_general_encoded_string (s)
|
||||
Result := Precursor {URL_ENCODER} (Result)
|
||||
has_error := has_error or utf8_has_error
|
||||
end
|
||||
|
||||
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ARRAY [CHARACTER]): STRING_8
|
||||
-- URL-encoded value of `s'.
|
||||
do
|
||||
Result := utf8_general_encoded_string (s)
|
||||
Result := Precursor {URL_ENCODER} (Result, a_ignore)
|
||||
has_error := has_error or utf8_has_error
|
||||
end
|
||||
|
||||
feature -- Decoder
|
||||
|
||||
decoded_string (v: READABLE_STRING_8): STRING_32
|
||||
-- The URL-encoded equivalent of the given string
|
||||
do
|
||||
Result := Precursor {URL_ENCODER} (v)
|
||||
if not has_error then
|
||||
Result := utf8_decoded_string (Result)
|
||||
has_error := utf8_has_error
|
||||
end
|
||||
end
|
||||
|
||||
note
|
||||
copyright: "2011-2013, Eiffel Software and others"
|
||||
copyright: "Copyright (c) 2011-2014, Eiffel Software and others"
|
||||
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
|
||||
source: "[
|
||||
Eiffel Software
|
||||
|
||||
@@ -21,6 +21,7 @@ feature -- Test routines
|
||||
do
|
||||
test_json_encoded_encoding ({STRING_32}"il était une fois %"Ni & Hao%" (你好) \a\b\c")
|
||||
test_json_encoded_encoding ({STRING_32}" it's `abc’ ")
|
||||
test_json_encoded_encoding ({STRING_32}"tab%Tnew line%N %"double quote %"")
|
||||
end
|
||||
|
||||
test_json_encoded_encoding (s: STRING_32)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
note
|
||||
note
|
||||
description: "[
|
||||
Eiffel tests that can be executed by testing tool.
|
||||
]"
|
||||
@@ -18,23 +18,43 @@ feature -- Test routines
|
||||
test_url_encoded_encoder
|
||||
note
|
||||
testing: "url-encoded"
|
||||
local
|
||||
utf8: STRING_8
|
||||
do
|
||||
test_utf8_decoding ("%%C3%%A9t%%C3%%A9", {STRING_32}"<22>t<EFBFBD>")
|
||||
create utf8.make_empty
|
||||
utf8.append_code (195) --+
|
||||
utf8.append_code (169) -- é
|
||||
utf8.append_code (116) -- t
|
||||
utf8.append_code (195) --+
|
||||
utf8.append_code (169) -- é
|
||||
test_utf8_decoding (utf8, {STRING_32}"été")
|
||||
|
||||
create utf8.make_empty
|
||||
utf8.append_code (228) --+
|
||||
utf8.append_code (189) --+
|
||||
utf8.append_code (160) -- 你
|
||||
|
||||
utf8.append_code (229) --+
|
||||
utf8.append_code (165) --+
|
||||
utf8.append_code (189) -- 好
|
||||
|
||||
utf8.append_code (229) --+
|
||||
utf8.append_code (144) --+
|
||||
utf8.append_code (151) -- 吗
|
||||
|
||||
test_utf8_decoding (utf8, {STRING_32}"你好吗")
|
||||
end
|
||||
|
||||
test_utf8_decoding (s: STRING_8; e: STRING_32)
|
||||
local
|
||||
url: URL_ENCODER
|
||||
u: STRING_32
|
||||
b: UTF8_ENCODER
|
||||
do
|
||||
create b
|
||||
create url
|
||||
u := b.decoded_string (url.decoded_string (s))
|
||||
u := b.decoded_string (s)
|
||||
assert ("decoded encoded string is same for %"" + s + "%"", u ~ e)
|
||||
end
|
||||
|
||||
|
||||
note
|
||||
copyright: "2011-2011, Eiffel Software and others"
|
||||
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
|
||||
|
||||
Reference in New Issue
Block a user