From d40131f863af8abd11c7aa32ef2591fa00e21729 Mon Sep 17 00:00:00 2001 From: Jocelyn Fiat Date: Wed, 9 Apr 2014 18:10:51 +0200 Subject: [PATCH] Updated encoder library, especially URL encoders to reuse implementation of percent_encoder.e Fixed JSON_ENCODER for %T and related. Updated related autotest cases. --- library/text/encoder/src/json_encoder.e | 16 +- library/text/encoder/src/percent_encoder.e | 18 +- library/text/encoder/src/url_encoder.e | 348 +----------------- library/text/encoder/src/utf8_url_encoder.e | 54 +-- .../text/encoder/tests/test_json_encoder.e | 1 + .../text/encoder/tests/test_utf8_encoder.e | 32 +- 6 files changed, 62 insertions(+), 407 deletions(-) diff --git a/library/text/encoder/src/json_encoder.e b/library/text/encoder/src/json_encoder.e index 333c7e34..84909edd 100644 --- a/library/text/encoder/src/json_encoder.e +++ b/library/text/encoder/src/json_encoder.e @@ -50,8 +50,11 @@ feature -- Encoder inspect c when '%"' then Result.append_string ("\%"") when '\' then Result.append_string ("\\") - when '%R' then Result.append_string ("\r") + when '%B' then Result.append_string ("\b") + when '%F' then Result.append_string ("\f") when '%N' then Result.append_string ("\n") + when '%R' then Result.append_string ("\r") + when '%T' then Result.append_string ("\t") else Result.extend (c) end @@ -103,12 +106,21 @@ feature -- Decoder when '%"' then Result.append_character ('%"') i := i + 2 + when 'b' then + Result.append_character ('%B') + i := i + 2 + when 'f' then + Result.append_character ('%F') + i := i + 2 when 'n' then Result.append_character ('%N') i := i + 2 when 'r' then Result.append_character ('%R') i := i + 2 + when 't' then + Result.append_character ('%T') + i := i + 2 when 'u' then hex := v.substring (i+2, i+2+4 - 1) if hex.count = 4 then @@ -170,7 +182,7 @@ feature {NONE} -- Implementation end note - copyright: "2011-2012, Eiffel Software and others" + copyright: "Copyright (c) 2011-2014, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software diff --git a/library/text/encoder/src/percent_encoder.e b/library/text/encoder/src/percent_encoder.e index 512094da..75b9961e 100644 --- a/library/text/encoder/src/percent_encoder.e +++ b/library/text/encoder/src/percent_encoder.e @@ -66,7 +66,7 @@ feature -- Percent encoding end end - partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]): STRING_8 + partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER]): STRING_8 -- Return `s' as percent-encoded value, -- but does not escape character listed in `a_ignore'. do @@ -74,12 +74,12 @@ feature -- Percent encoding append_partial_percent_encoded_string_to (s, Result, a_ignore) end - append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]) + append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER]) -- Append `s' as percent-encoded value to `a_result', -- but does not escape character listed in `a_ignore'. local c: NATURAL_32 - ch: CHARACTER_32 + ch: CHARACTER_8 i,n: INTEGER do has_error := False @@ -109,15 +109,21 @@ feature -- Percent encoding 43, 44, 59, 61, -- reserved = sub-delims: +,;= 37 -- percent encoding: % then - ch := c.to_character_32 + check c.is_valid_character_8_code end + ch := c.to_character_8 if across a_ignore as ic some ic.item = ch end then a_result.append_code (c) else append_percent_encoded_character_code_to (c, a_result) end else - if across a_ignore as ic some ic.item = ch end then - a_result.append_code (c) + if c.is_valid_character_8_code then + ch := c.to_character_8 + if across a_ignore as ic some ic.item = ch end then + a_result.append_code (c) + else + append_percent_encoded_character_code_to (c, a_result) + end else append_percent_encoded_character_code_to (c, a_result) end diff --git a/library/text/encoder/src/url_encoder.e b/library/text/encoder/src/url_encoder.e index b15e9385..8975e8cb 100644 --- a/library/text/encoder/src/url_encoder.e +++ b/library/text/encoder/src/url_encoder.e @@ -20,6 +20,12 @@ inherit {NONE} all end + PERCENT_ENCODER + rename + percent_encoded_string as general_encoded_string, + percent_decoded_string as general_decoded_string + end + feature -- Access name: READABLE_STRING_8 @@ -27,10 +33,6 @@ feature -- Access create {IMMUTABLE_STRING_8} Result.make_from_string ("URL-encoded") end -feature -- Status report - - has_error: BOOLEAN - feature -- Encoder encoded_string (s: READABLE_STRING_32): STRING_8 @@ -39,350 +41,16 @@ feature -- Encoder Result := general_encoded_string (s) end - general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8 - -- URL-encoded value of `s'. - local - i, n: INTEGER - c: CHARACTER_8 - l_code: NATURAL_32 - do - has_error := False - create Result.make (s.count + s.count // 10) - n := s.count - from i := 1 until i > n loop - l_code := s.code (i) - if l_code.is_valid_character_8_code then - c := l_code.to_character_8 - inspect c - when - 'A' .. 'Z', - 'a' .. 'z', '0' .. '9', - '.', '-', '~', '_' - then - Result.extend (c) - else - append_url_encoded_char (l_code, Result) - end - else - append_url_encoded_char (l_code, Result) - end - i := i + 1 - end - end - - partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ARRAY [CHARACTER]): STRING_8 - -- URL-encoded value of `s'. - local - i, n: INTEGER - l_code: NATURAL_32 - c: CHARACTER_8 - s8: STRING_8 - do - has_error := False - create s8.make (s.count + s.count // 10) - Result := s8 - n := s.count - from i := 1 until i > n loop - l_code := s.code (i) - if l_code.is_valid_character_8_code then - c := l_code.to_character_8 - inspect c - when - 'A' .. 'Z', - 'a' .. 'z', '0' .. '9', - '.', '-', '~', '_' - then - s8.extend (c) - else - if a_ignore.has (c) then - s8.extend (c) - else - append_url_encoded_char (l_code, s8) - end - end - else - if a_ignore.has (c) then - s8.extend (c) - else - append_url_encoded_char (l_code, s8) - end - end - i := i + 1 - end - end - -feature {NONE} -- encoder character - - append_url_encoded_char (a_code: NATURAL_32; a_output: STRING_GENERAL) - local - c: INTEGER - do - if a_code.is_valid_character_8_code then - c := a_code.to_integer_32 - a_output.append_code (37) -- 37 '%%' - a_output.append_code (hex_digit [c |>> 4]) - a_output.append_code (hex_digit [c & 0xF]) - else - has_error := True --| Non-ascii escape not currently supported - end - end - - hex_digit: SPECIAL [NATURAL_32] - -- Hexadecimal digits. - once - create Result.make_filled (0, 16) - Result [0] := {NATURAL_32} 48 -- 48 '0' - Result [1] := {NATURAL_32} 49 -- 49 '1' - Result [2] := {NATURAL_32} 50 -- 50 '2' - Result [3] := {NATURAL_32} 51 -- 51 '3' - Result [4] := {NATURAL_32} 52 -- 52 '4' - Result [5] := {NATURAL_32} 53 -- 53 '5' - Result [6] := {NATURAL_32} 54 -- 54 '6' - Result [7] := {NATURAL_32} 55 -- 55 '7' - Result [8] := {NATURAL_32} 56 -- 56 '8' - Result [9] := {NATURAL_32} 57 -- 57 '9' - Result [10] := {NATURAL_32} 65 -- 65 'A' - Result [11] := {NATURAL_32} 66 -- 66 'B' - Result [12] := {NATURAL_32} 67 -- 67 'C' - Result [13] := {NATURAL_32} 68 -- 68 'D' - Result [14] := {NATURAL_32} 69 -- 69 'E' - Result [15] := {NATURAL_32} 70 -- 70 'F' - end - feature -- Decoder decoded_string (v: READABLE_STRING_8): STRING_32 -- The URL-encoded equivalent of the given string - local - i, n: INTEGER - c: CHARACTER - pr: CELL [INTEGER] - changed: BOOLEAN do - has_error := False - n := v.count - create Result.make (n) - from i := 1 - until i > n - loop - c := v.item (i) - inspect c - when '+' then - changed := True - Result.append_character ({CHARACTER_32}' ') - when '%%' then - -- An escaped character ? - if i = n then - Result.append_character (c.to_character_32) - else - changed := True - create pr.put (i) - Result.append (url_decoded_char (v, pr)) - i := pr.item - end - else - Result.append_character (c.to_character_32) - end - i := i + 1 - end - end - -feature {NONE} -- decoded character - - url_decoded_char (buf: STRING_8; posr: CELL [INTEGER]): STRING_32 - -- Character(s) resulting from decoding the URL-encoded string - require - stream_exists: buf /= Void - posr_exists: posr /= Void - valid_start: posr.item <= buf.count - local - c: CHARACTER - i, n, nb: INTEGER - not_a_digit: BOOLEAN - ascii_pos, ival: INTEGER - pos: INTEGER - do - --| pos is index in stream of escape character ('%') - pos := posr.item - create Result.make (4) - if buf.item (pos + 1) = 'u' then - -- An escaped Unicode (ucs2) value, from ECMA scripts - -- Has the form: %u where is the UCS value - -- of the character (two byte integer, one to 4 chars - -- after escape sequence). - -- UTF-8 result can be 1 to 4 characters - n := buf.count - from i := pos + 2 - until (i > n) or not_a_digit - loop - c := buf.item (i) - if c.is_hexa_digit then - ival := ival * 16 - if c.is_digit then - ival := ival + (c |-| '0') - else - ival := ival + (c.upper |-| 'A') + 10 - end - i := i + 1 - else - not_a_digit := True - end - end - posr.replace (i) - -- ival is now UCS2 value; needs conversion to UTF8 - Result.append_code (ival.as_natural_32) - nb := utf8_bytes_in_sequence (buf, pos) - else - -- ASCII char? - ascii_pos := hex_to_integer_32 (buf.substring (pos+1, pos+2)) - if ascii_pos >= 0x80 and ascii_pos <= 0xff then - -- Might be improperly escaped - Result.append_code (ascii_pos.as_natural_32) - posr.replace (pos + 2) - else - Result.append_code (ascii_pos.as_natural_32) - posr.replace (pos + 2) - end - end - ensure - exists: Result /= Void - end - -feature {NONE} -- UTF8 - - utf8_bytes_in_sequence (s: STRING_8; spos: INTEGER): INTEGER - -- If the given character is a legal first byte element in a - -- utf8 byte sequence (aka character), then return the number - -- of bytes in that sequence - -- Result of zero means it's not a utf8 first byte - require - exists: s /= Void - long_enough: s.count >= spos - do - Result := bytes_in_utf8_char (s.item (spos)) - end - - bytes_in_utf8_char (v: CHARACTER_8): INTEGER - -- If the given byte a legal first byte element in a utf8 sequence, - -- then the number of bytes in that character - -- Zero denotes an error, i.e. not a legal UTF8 char - -- - -- The first byte of a UTF8 encodes the length - local - c: NATURAL_8 - do - c := v.code.to_natural_8 - Result := 1 -- 7 bit ASCII - if (c & 0x80) /= 0 then - -- Hi bit means not ASCII - Result := 0 - if (c & 0xe0) = 0xc0 then - -- If we see a first byte as b110xxxxx - -- then we expect a two-byte character - Result := 2 - elseif (c & 0xf0) = 0xe0 then - -- If we see a first byte as b1110xxxx - -- then we expect a three-byte character - Result := 3 - elseif (c & 0xf8) = 0xf0 then - -- If we see a first byte as b11110xxx - -- then we expect a four-byte character - Result := 4 - elseif (c & 0xfc) = 0xf8 then - -- If we see a first byte as b111110xx - -- then we expect a five-byte character - Result := 5 - elseif (c & 0xfe) = 0xfc then - -- If we see a first byte as b1111110x - -- then we expect a six-byte character - Result := 6 - end - end - end - -feature {NONE} -- Hexadecimal and strings - - hex_to_integer_32 (s: STRING): INTEGER_32 - -- Hexadecimal string `s' converted to INTEGER_32 value - require - s_not_void: s /= Void - local - i, nb: INTEGER; - char: CHARACTER - do - nb := s.count - - if nb >= 2 and then s.item (2) = 'x' then - i := 3 - else - i := 1 - end - - from - until - i > nb - loop - Result := Result * 16 - char := s.item (i) - if char >= '0' and then char <= '9' then - Result := Result + (char |-| '0') - else - Result := Result + (char.lower |-| 'a' + 10) - end - i := i + 1 - end - end - - hex_to_integer_64 (s: STRING): INTEGER_64 - -- Hexadecimal string `s' converted to INTEGER_64 value - require - s_not_void: s /= Void - local - i, nb: INTEGER; - char: CHARACTER - do - nb := s.count - - if nb >= 2 and then s.item (2) = 'x' then - i := 3 - else - i := 1 - end - - from - until - i > nb - loop - Result := Result * 16 - char := s.item (i) - if char >= '0' and then char <= '9' then - Result := Result + (char |-| '0') - else - Result := Result + (char.lower |-| 'a' + 10) - end - i := i + 1 - end - end - - hex_to_pointer (s: STRING): POINTER - -- Hexadecimal string `s' converted to POINTER value - require - s_not_void: s /= Void - local - val_32: INTEGER_32 - val_64: INTEGER_64 - do - if Pointer_bytes = Integer_64_bytes then - val_64 := hex_to_integer_64 (s) - ($Result).memory_copy ($val_64, Pointer_bytes) - else - val_32 := hex_to_integer_32 (s) - ($Result).memory_copy ($val_32, Pointer_bytes) - end + Result := general_decoded_string (v) end note - copyright: "2011-2014, Eiffel Software and others" + copyright: "Copyright (c) 2011-2014, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software diff --git a/library/text/encoder/src/utf8_url_encoder.e b/library/text/encoder/src/utf8_url_encoder.e index 1edfef50..33c97b2e 100644 --- a/library/text/encoder/src/utf8_url_encoder.e +++ b/library/text/encoder/src/utf8_url_encoder.e @@ -14,23 +14,6 @@ class inherit URL_ENCODER - redefine - name, - general_encoded_string, - encoded_string, partial_encoded_string, - decoded_string - select - encoded_string, - decoded_string, - has_error - end - - UTF8_ENCODER - rename - general_encoded_string as utf8_general_encoded_string, - encoded_string as utf8_encoded_string, - decoded_string as utf8_decoded_string, - has_error as utf8_has_error redefine name end @@ -42,43 +25,8 @@ feature -- Access create {IMMUTABLE_STRING_8} Result.make_from_string ("UTF8-URL-encoded") end -feature -- Encoder - - encoded_string (s: READABLE_STRING_32): STRING_8 - -- URL-encoded value of `s'. - do - Result := general_encoded_string (s) - end - - general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8 - do - Result := utf8_general_encoded_string (s) - Result := Precursor {URL_ENCODER} (Result) - has_error := has_error or utf8_has_error - end - - partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ARRAY [CHARACTER]): STRING_8 - -- URL-encoded value of `s'. - do - Result := utf8_general_encoded_string (s) - Result := Precursor {URL_ENCODER} (Result, a_ignore) - has_error := has_error or utf8_has_error - end - -feature -- Decoder - - decoded_string (v: READABLE_STRING_8): STRING_32 - -- The URL-encoded equivalent of the given string - do - Result := Precursor {URL_ENCODER} (v) - if not has_error then - Result := utf8_decoded_string (Result) - has_error := utf8_has_error - end - end - note - copyright: "2011-2013, Eiffel Software and others" + copyright: "Copyright (c) 2011-2014, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software diff --git a/library/text/encoder/tests/test_json_encoder.e b/library/text/encoder/tests/test_json_encoder.e index 6c54c974..3d370213 100644 --- a/library/text/encoder/tests/test_json_encoder.e +++ b/library/text/encoder/tests/test_json_encoder.e @@ -21,6 +21,7 @@ feature -- Test routines do test_json_encoded_encoding ({STRING_32}"il était une fois %"Ni & Hao%" (你好) \a\b\c") test_json_encoded_encoding ({STRING_32}" it's `abc’ ") + test_json_encoded_encoding ({STRING_32}"tab%Tnew line%N %"double quote %"") end test_json_encoded_encoding (s: STRING_32) diff --git a/library/text/encoder/tests/test_utf8_encoder.e b/library/text/encoder/tests/test_utf8_encoder.e index e1f29c61..b2c1525a 100644 --- a/library/text/encoder/tests/test_utf8_encoder.e +++ b/library/text/encoder/tests/test_utf8_encoder.e @@ -1,4 +1,4 @@ -note +note description: "[ Eiffel tests that can be executed by testing tool. ]" @@ -18,23 +18,43 @@ feature -- Test routines test_url_encoded_encoder note testing: "url-encoded" + local + utf8: STRING_8 do - test_utf8_decoding ("%%C3%%A9t%%C3%%A9", {STRING_32}"t") + create utf8.make_empty + utf8.append_code (195) --+ + utf8.append_code (169) -- é + utf8.append_code (116) -- t + utf8.append_code (195) --+ + utf8.append_code (169) -- é + test_utf8_decoding (utf8, {STRING_32}"été") + + create utf8.make_empty + utf8.append_code (228) --+ + utf8.append_code (189) --+ + utf8.append_code (160) -- 你 + + utf8.append_code (229) --+ + utf8.append_code (165) --+ + utf8.append_code (189) -- 好 + + utf8.append_code (229) --+ + utf8.append_code (144) --+ + utf8.append_code (151) -- 吗 + + test_utf8_decoding (utf8, {STRING_32}"你好吗") end test_utf8_decoding (s: STRING_8; e: STRING_32) local - url: URL_ENCODER u: STRING_32 b: UTF8_ENCODER do create b - create url - u := b.decoded_string (url.decoded_string (s)) + u := b.decoded_string (s) assert ("decoded encoded string is same for %"" + s + "%"", u ~ e) end - note copyright: "2011-2011, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"