diff --git a/library/server/wsf/src/implementation/wsf_percent_encoder.e b/library/server/wsf/src/implementation/wsf_percent_encoder.e index 8d60b89a..93797fa2 100644 --- a/library/server/wsf/src/implementation/wsf_percent_encoder.e +++ b/library/server/wsf/src/implementation/wsf_percent_encoder.e @@ -9,509 +9,11 @@ note class WSF_PERCENT_ENCODER -feature -- Percent encoding - - percent_encoded_string (v: READABLE_STRING_GENERAL): STRING_8 - -- Return `a_string' percent-encoded - do - create Result.make (v.count) - append_percent_encoded_string_to (v, Result) - end - - append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL) - -- Append `a_string' as percent-encoded value to `a_result' - local - c: NATURAL_32 - i,n: INTEGER - do - from - i := 1 - n := s.count - until - i > n - loop - c := s.code (i) - if - --| unreserved ALPHA / DIGIT - (48 <= c and c <= 57) -- DIGIT: 0 .. 9 - or (65 <= c and c <= 90) -- ALPHA: A .. Z - or (97 <= c and c <= 122) -- ALPHA: a .. z - then - a_result.append_code (c) - else - inspect c - when - 45, 46, 95, 126 -- unreserved characters: -._~ - then - a_result.append_code (c) - when - 58, 64, -- reserved =+ gen-delims: :@ - 33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()* - 43, 44, 59, 61, -- reserved = sub-delims: +,;= - 37 -- percent encoding: % - then - append_percent_encoded_character_code_to (c, a_result) - else - append_percent_encoded_character_code_to (c, a_result) - end - end - i := i + 1 - end - end - -feature -- Percent encoding: character - - append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) - -- Append character code `a_code' as percent-encoded content into `a_result' - do - if a_code > 0xFF then - -- Unicode - append_percent_encoded_unicode_character_code_to (a_code, a_result) - elseif a_code > 0x7F then - -- Extended ASCII - -- This requires percent-encoding on UTF-8 converted character. - append_percent_encoded_unicode_character_code_to (a_code, a_result) - else - -- ASCII - append_percent_encoded_ascii_character_code_to (a_code, a_result) - end - ensure - appended: a_result.count > old a_result.count - end - -feature {NONE} -- Implementation: character encoding - - append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) - -- Append extended ascii character code `a_code' as percent-encoded content into `a_result' - -- Note: it does not UTF-8 convert this extended ASCII. - require - is_extended_ascii: a_code <= 0xFF - local - c: INTEGER - do - if a_code > 0xFF then - -- Unicode - append_percent_encoded_unicode_character_code_to (a_code, a_result) - else - -- Extended ASCII - c := a_code.to_integer_32 - a_result.append_code (37) -- 37 '%%' - a_result.append_code (hex_digit [c |>> 4]) - a_result.append_code (hex_digit [c & 0xF]) - end - ensure - appended: a_result.count > old a_result.count - end - - append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) - -- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result' - -- Note: it does include UTF-8 conversion of extended ASCII and Unicode. - do - if a_code <= 0x7F then - -- 0xxxxxxx - append_percent_encoded_ascii_character_code_to (a_code, a_result) - elseif a_code <= 0x7FF then - -- 110xxxxx 10xxxxxx - append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result) - append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) - elseif a_code <= 0xFFFF then - -- 1110xxxx 10xxxxxx 10xxxxxx - append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result) - append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result) - append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) - else - -- c <= 1FFFFF - there are no higher code points - -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result) - append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result) - append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result) - append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) - end - ensure - appended: a_result.count > old a_result.count - end - -feature -- Percent decoding - - percent_decoded_string (v: READABLE_STRING_GENERAL): STRING_32 - -- Return the percent decoded string equivalent to the percent-encoded string `v' - --| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8 - do - create Result.make (v.count) - append_percent_decoded_string_to (v, Result) - end - - append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL) - -- Append to `a_result' a string equivalent to the percent-encoded string `v' - --| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8 - local - i,n: INTEGER - c: NATURAL_32 - pr: CELL [INTEGER] - a_result_is_string_32: BOOLEAN - do - a_result_is_string_32 := attached {STRING_32} a_result - from - i := 1 - create pr.put (i) - n := v.count - until - i > n - loop - c := v.code (i) - inspect c - when 43 then -- 43 '+' - -- Some implementation are replacing spaces with "+" instead of "%20" - a_result.append_code (32) -- 32 ' ' - when 37 then -- 37 '%%' - -- An escaped character ? - if i = n then -- Error? - a_result.append_code (c) - else - if a_result_is_string_32 then - -- Convert UTF-8 to UTF-32 - pr.replace (i) - c := next_percent_decoded_unicode_character_code (v, pr) - a_result.append_code (c) - i := pr.item - else - -- Keep UTF-8 - pr.replace (i) - c := next_percent_decoded_character_code (v, pr) - a_result.append_code (c) - i := pr.item - end - end - else - if c <= 0x7F then - a_result.append_code (c) - else - if a_result_is_string_32 then - a_result.append_code (c) - else - append_percent_encoded_character_code_to (c, a_result) - end - end - end - i := i + 1 - end - end - -feature {NONE} -- Implementation: decoding - - next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32 - -- Character decoded from string `v' starting from index `a_position.item' - -- note: it also updates `a_position.item' to indicate the new index position. - require - valid_start: a_position.item <= v.count - is_percent_char: v.code (a_position.item) = 37 -- 37 '%%' - local - c: NATURAL_32 - i, n: INTEGER - not_a_digit: BOOLEAN - ascii_pos: NATURAL_32 - ival: NATURAL_32 - pos: INTEGER - c_is_digit: BOOLEAN - do - --| pos is index in stream of escape character ('%') - pos := a_position.item - c := v.code (pos + 1) - if c = 85 or c = 117 then -- 117 'u' 85 'U' - -- NOTE: this is not a standard, but it can occur, so use this for decoding only - -- An escaped Unicode (ucs2) value, from ECMA scripts - -- has the form: %u where is the UCS value - -- of the character (two byte integer, one to 4 chars - -- after escape sequence). - -- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations - -- UTF-8 result can be 1 to 4 characters. - from - i := pos + 2 - n := v.count - until - (i > n) or not_a_digit - loop - c := v.code (i) - c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9 - if - c_is_digit - or (97 <= c and c <= 102) -- ALPHA: a..f - or (65 <= c and c <= 70) -- ALPHA: A..F - then - ival := ival * 16 - if c_is_digit then - ival := ival + (c - 48) -- 48 '0' - else - if c > 70 then -- a..f - ival := ival + (c - 97) + 10 -- 97 'a' - else -- A..F - ival := ival + (c - 65) + 10 -- 65 'A' - end - end - i := i + 1 - else - not_a_digit := True - i := i - 1 - end - end - a_position.replace (i) - Result := ival - else - -- ASCII char? - ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2)) - Result := ascii_pos - a_position.replace (pos + 2) - end - end - - next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32 - -- Next decoded character from `v' at position `a_position.item' - -- note: it also updates `a_position' to indicate the new index position. - require - valid_start: a_position.item <= v.count - is_percent_char: v.code (a_position.item) = 37 -- 37 '%%' - local - n, j: INTEGER - c: NATURAL_32 - c1, c2, c3, c4: NATURAL_32 - pr: CELL [INTEGER] - do - create pr.put (a_position.item) - c1 := next_percent_decoded_character_code (v, pr) - - j := pr.item - n := v.count - - Result := c1 - a_position.replace (j) - - if c1 <= 0x7F then - -- 0xxxxxxx - Result := c1 - elseif c1 <= 0xDF then - -- 110xxxxx 10xxxxxx - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c2 := next_percent_decoded_character_code (v, pr) - j := pr.item - Result := ( - ((c1 & 0x1F) |<< 6) | - ( c2 & 0x3F ) - ) - a_position.replace (j) - else - -- Do not try to decode - end - end - elseif c1 <= 0xEF then - -- 1110xxxx 10xxxxxx 10xxxxxx - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c2 := next_percent_decoded_character_code (v, pr) - j := pr.item - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c3 := next_percent_decoded_character_code (v, pr) - j := pr.item - - Result := ( - ((c1 & 0xF) |<< 12) | - ((c2 & 0x3F) |<< 6) | - ( c3 & 0x3F ) - ) - a_position.replace (j) - else - -- Do not try to decode - end - end - else - -- Do not try to decode - end - end - elseif c1 <= 0xF7 then - -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c2 := next_percent_decoded_character_code (v, pr) - j := pr.item - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c3 := next_percent_decoded_character_code (v, pr) - j := pr.item - if j + 2 <= n then - c := v.code (j + 1) - if c = 37 then -- 37 '%%' - pr.replace (j + 1) - c4 := next_percent_decoded_character_code (v, pr) - j := pr.item - - a_position.replace (j) - - Result := ( - ((c1 & 0x7) |<< 18 ) | - ((c2 & 0x3F) |<< 12) | - ((c3 & 0x3F) |<< 6) | - ( c4 & 0x3F ) - ) - else - -- Do not try to decode - end - end - else - -- Do not try to decode - end - end - else - -- Do not try to decode - end - end - else - Result := c1 - end - end - -feature -- RFC and characters - - is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN - -- Is hexadecimal character ? - do - Result := ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F') -- HEXA - or ('0' <= c and c <= '9') -- DIGIT - end - - is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN - -- Is ALPHA or DIGIT character ? - do - Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') -- ALPHA - or ('0' <= c and c <= '9') -- DIGIT - end - - is_alpha_character (c: CHARACTER_32): BOOLEAN - -- Is ALPHA character ? - do - Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') - end - - is_digit_character (c: CHARACTER_32): BOOLEAN - -- Is DIGIT character ? - do - Result := ('0' <= c and c <= '9') - end - - is_unreserved_character (c: CHARACTER_32): BOOLEAN - -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - do - if - ('a' <= c and c <= 'z') -- ALPHA - or ('A' <= c and c <= 'Z') -- ALPHA - or ('0' <= c and c <= '9') -- DIGIT - then - Result := True - else - inspect c - when '-', '_', '.', '~' then -- unreserved - Result := True - else - end - end - end - - is_reserved_character (c: CHARACTER_32): BOOLEAN - -- reserved = gen-delims / sub-delims - do - Result := is_gen_delims_character (c) or is_sub_delims_character (c) - end - - is_gen_delims_character (c: CHARACTER_32): BOOLEAN - -- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - do - inspect c - when ':' , '/', '?' , '#' , '[' , ']' , '@' then - Result := True - else - end - end - - is_sub_delims_character (c: CHARACTER_32): BOOLEAN - -- sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - -- / "*" / "+" / "," / ";" / "=" - do - inspect c - when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims - Result := True - else - end - end - -feature {NONE} -- Implementation - - hex_digit: SPECIAL [NATURAL_32] - -- Hexadecimal digits. - once - create Result.make_filled (0, 16) - Result [0] := {NATURAL_32} 48 -- 48 '0' - Result [1] := {NATURAL_32} 49 -- 49 '1' - Result [2] := {NATURAL_32} 50 -- 50 '2' - Result [3] := {NATURAL_32} 51 -- 51 '3' - Result [4] := {NATURAL_32} 52 -- 52 '4' - Result [5] := {NATURAL_32} 53 -- 53 '5' - Result [6] := {NATURAL_32} 54 -- 54 '6' - Result [7] := {NATURAL_32} 55 -- 55 '7' - Result [8] := {NATURAL_32} 56 -- 56 '8' - Result [9] := {NATURAL_32} 57 -- 57 '9' - Result [10] := {NATURAL_32} 65 -- 65 'A' - Result [11] := {NATURAL_32} 66 -- 66 'B' - Result [12] := {NATURAL_32} 67 -- 67 'C' - Result [13] := {NATURAL_32} 68 -- 68 'D' - Result [14] := {NATURAL_32} 69 -- 69 'E' - Result [15] := {NATURAL_32} 70 -- 70 'F' - end - - is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN - -- Is `a_string' a valid hexadecimal sequence? - local - l_convertor: like ctoi_convertor - do - l_convertor := ctoi_convertor - l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32) - Result := l_convertor.is_integral_integer - end - - hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32 - -- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value. - require - is_hexa: is_hexa_decimal (a_hex_string) - local - l_convertor: like ctoi_convertor - do - l_convertor := ctoi_convertor - l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation) - Result := l_convertor.parsed_natural_32 - end - - ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER - -- Converter used to convert string to integer or natural. - once - create Result.make - Result.set_leading_separators_acceptable (False) - Result.set_trailing_separators_acceptable (False) - ensure - ctoi_convertor_not_void: Result /= Void - end +inherit + PERCENT_ENCODER note - copyright: "2011-2013, Jocelyn Fiat, Javier Velilla, Olivier Ligot, Eiffel Software and others" + copyright: "2011-2014, Jocelyn Fiat, Javier Velilla, Olivier Ligot, Colin Adams, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software diff --git a/library/text/encoder/license.lic b/library/text/encoder/license.lic index cf2d1ed9..545cc9d7 100644 --- a/library/text/encoder/license.lic +++ b/library/text/encoder/license.lic @@ -1,5 +1,5 @@ ${NOTE_KEYWORD} - copyright: "2011-${YEAR}, Eiffel Software and others" + copyright: "Copyright (c) 2011-${YEAR}, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software diff --git a/library/text/encoder/src/percent_encoder.e b/library/text/encoder/src/percent_encoder.e new file mode 100644 index 00000000..512094da --- /dev/null +++ b/library/text/encoder/src/percent_encoder.e @@ -0,0 +1,593 @@ +note + description: "[ + Component to handle percent encoding + ]" + date: "$Date: 2014-04-09 16:37:28 +0200 (mer., 09 avr. 2014) $" + revision: "$Revision: 94801 $" + EIS: "name=Percent-encoding", "protocol=URI", "src=http://en.wikipedia.org/wiki/Percent-encoding" + +class + PERCENT_ENCODER + +feature -- Status report + + has_error: BOOLEAN + -- Error occurred + --| For now this is not fully implemented, and thus not reliable. + +feature -- Percent encoding + + percent_encoded_string (s: READABLE_STRING_GENERAL): STRING_8 + -- Return `s' percent-encoded + do + create Result.make (s.count) + append_percent_encoded_string_to (s, Result) + end + + append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL) + -- Append `s' as percent-encoded value to `a_result' + local + c: NATURAL_32 + i,n: INTEGER + do + has_error := False + from + i := 1 + n := s.count + until + i > n + loop + c := s.code (i) + if + --| unreserved ALPHA / DIGIT + (48 <= c and c <= 57) -- DIGIT: 0 .. 9 + or (65 <= c and c <= 90) -- ALPHA: A .. Z + or (97 <= c and c <= 122) -- ALPHA: a .. z + then + a_result.append_code (c) + else + inspect c + when + 45, 46, 95, 126 -- unreserved characters: -._~ + then + a_result.append_code (c) + when + 58, 64, -- reserved =+ gen-delims: :@ + 33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()* + 43, 44, 59, 61, -- reserved = sub-delims: +,;= + 37 -- percent encoding: % + then + append_percent_encoded_character_code_to (c, a_result) + else + append_percent_encoded_character_code_to (c, a_result) + end + end + i := i + 1 + end + end + + partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]): STRING_8 + -- Return `s' as percent-encoded value, + -- but does not escape character listed in `a_ignore'. + do + create Result.make (s.count) + append_partial_percent_encoded_string_to (s, Result, a_ignore) + end + + append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]) + -- Append `s' as percent-encoded value to `a_result', + -- but does not escape character listed in `a_ignore'. + local + c: NATURAL_32 + ch: CHARACTER_32 + i,n: INTEGER + do + has_error := False + from + i := 1 + n := s.count + until + i > n + loop + c := s.code (i) + if + --| unreserved ALPHA / DIGIT + (48 <= c and c <= 57) -- DIGIT: 0 .. 9 + or (65 <= c and c <= 90) -- ALPHA: A .. Z + or (97 <= c and c <= 122) -- ALPHA: a .. z + then + a_result.append_code (c) + else + inspect c + when + 45, 46, 95, 126 -- unreserved characters: -._~ + then + a_result.append_code (c) + when + 58, 64, -- reserved =+ gen-delims: :@ + 33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()* + 43, 44, 59, 61, -- reserved = sub-delims: +,;= + 37 -- percent encoding: % + then + ch := c.to_character_32 + if across a_ignore as ic some ic.item = ch end then + a_result.append_code (c) + else + append_percent_encoded_character_code_to (c, a_result) + end + else + if across a_ignore as ic some ic.item = ch end then + a_result.append_code (c) + else + append_percent_encoded_character_code_to (c, a_result) + end + end + end + i := i + 1 + end + end + +feature -- Percent encoding: character + + append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) + -- Append character code `a_code' as percent-encoded content into `a_result' + do + if a_code > 0xFF then + -- Unicode + append_percent_encoded_unicode_character_code_to (a_code, a_result) + elseif a_code > 0x7F then + -- Extended ASCII + -- This requires percent-encoding on UTF-8 converted character. + append_percent_encoded_unicode_character_code_to (a_code, a_result) + else + -- ASCII + append_percent_encoded_ascii_character_code_to (a_code, a_result) + end + ensure + appended: a_result.count > old a_result.count + end + +feature {NONE} -- Implementation: character encoding + + append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) + -- Append extended ascii character code `a_code' as percent-encoded content into `a_result' + -- Note: it does not UTF-8 convert this extended ASCII. + require + is_extended_ascii: a_code <= 0xFF + local + c: INTEGER + do + if a_code > 0xFF then + -- Unicode + append_percent_encoded_unicode_character_code_to (a_code, a_result) + else + -- Extended ASCII + c := a_code.to_integer_32 + a_result.append_code (37) -- 37 '%%' + a_result.append_code (hex_digit [c |>> 4]) + a_result.append_code (hex_digit [c & 0xF]) + end + ensure + appended: a_result.count > old a_result.count + end + + append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL) + -- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result' + -- Note: it does include UTF-8 conversion of extended ASCII and Unicode. + do + if a_code <= 0x7F then + -- 0xxxxxxx + append_percent_encoded_ascii_character_code_to (a_code, a_result) + elseif a_code <= 0x7FF then + -- 110xxxxx 10xxxxxx + append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result) + append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) + elseif a_code <= 0xFFFF then + -- 1110xxxx 10xxxxxx 10xxxxxx + append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result) + append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result) + append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) + else + -- c <= 1FFFFF - there are no higher code points + -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result) + append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result) + append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result) + append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result) + end + ensure + appended: a_result.count > old a_result.count + end + +feature -- Percent decoding + + percent_decoded_string (v: READABLE_STRING_GENERAL): STRING_32 + -- Return the percent decoded string equivalent to the percent-encoded string `v' + --| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8 + do + create Result.make (v.count) + append_percent_decoded_string_to (v, Result) + end + + append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL) + -- Append to `a_result' a string equivalent to the percent-encoded string `v' + --| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8 + local + i,n: INTEGER + c: NATURAL_32 + pr: CELL [INTEGER] + a_result_is_string_32: BOOLEAN + do + has_error := False + a_result_is_string_32 := attached {STRING_32} a_result + from + i := 1 + create pr.put (i) + n := v.count + until + i > n + loop + c := v.code (i) + inspect c + when 43 then -- 43 '+' + -- Some implementation are replacing spaces with "+" instead of "%20" + a_result.append_code (32) -- 32 ' ' + when 37 then -- 37 '%%' + -- An escaped character ? + if i = n then -- Error? + has_error := True + a_result.append_code (c) + else + if a_result_is_string_32 then + -- Convert UTF-8 to UTF-32 + pr.replace (i) + c := next_percent_decoded_unicode_character_code (v, pr) + a_result.append_code (c) + i := pr.item + else + -- Keep UTF-8 + pr.replace (i) + c := next_percent_decoded_character_code (v, pr) + a_result.append_code (c) + i := pr.item + end + end + else + if c <= 0x7F then + a_result.append_code (c) + else + if a_result_is_string_32 then + a_result.append_code (c) + else + append_percent_encoded_character_code_to (c, a_result) + end + end + end + i := i + 1 + end + end + +feature {NONE} -- Implementation: decoding + + next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32 + -- Character decoded from string `v' starting from index `a_position.item' + -- note: it also updates `a_position.item' to indicate the new index position. + require + valid_start: a_position.item <= v.count + is_percent_char: v.code (a_position.item) = 37 -- 37 '%%' + local + c: NATURAL_32 + i, n: INTEGER + not_a_digit: BOOLEAN + ascii_pos: NATURAL_32 + ival: NATURAL_32 + pos: INTEGER + c_is_digit: BOOLEAN + do + --| pos is index in stream of escape character ('%') + pos := a_position.item + c := v.code (pos + 1) + if c = 85 or c = 117 then -- 117 'u' 85 'U' + -- NOTE: this is not a standard, but it can occur, so use this for decoding only + -- An escaped Unicode (ucs2) value, from ECMA scripts + -- has the form: %u where is the UCS value + -- of the character (two byte integer, one to 4 chars + -- after escape sequence). + -- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations + -- UTF-8 result can be 1 to 4 characters. + from + i := pos + 2 + n := v.count + until + (i > n) or not_a_digit + loop + c := v.code (i) + c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9 + if + c_is_digit + or (97 <= c and c <= 102) -- ALPHA: a..f + or (65 <= c and c <= 70) -- ALPHA: A..F + then + ival := ival * 16 + if c_is_digit then + ival := ival + (c - 48) -- 48 '0' + else + if c > 70 then -- a..f + ival := ival + (c - 97) + 10 -- 97 'a' + else -- A..F + ival := ival + (c - 65) + 10 -- 65 'A' + end + end + i := i + 1 + else + not_a_digit := True + i := i - 1 + end + end + a_position.replace (i) + Result := ival + else + -- ASCII char? + ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2)) + Result := ascii_pos + a_position.replace (pos + 2) + end + end + + next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32 + -- Next decoded character from `v' at position `a_position.item' + -- note: it also updates `a_position' to indicate the new index position. + require + valid_start: a_position.item <= v.count + is_percent_char: v.code (a_position.item) = 37 -- 37 '%%' + local + n, j: INTEGER + c: NATURAL_32 + c1, c2, c3, c4: NATURAL_32 + pr: CELL [INTEGER] + do + create pr.put (a_position.item) + c1 := next_percent_decoded_character_code (v, pr) + + j := pr.item + n := v.count + + Result := c1 + a_position.replace (j) + + if c1 <= 0x7F then + -- 0xxxxxxx + Result := c1 + elseif c1 <= 0xDF then + -- 110xxxxx 10xxxxxx + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c2 := next_percent_decoded_character_code (v, pr) + j := pr.item + Result := ( + ((c1 & 0x1F) |<< 6) | + ( c2 & 0x3F ) + ) + a_position.replace (j) + else + -- Do not try to decode + end + end + elseif c1 <= 0xEF then + -- 1110xxxx 10xxxxxx 10xxxxxx + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c2 := next_percent_decoded_character_code (v, pr) + j := pr.item + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c3 := next_percent_decoded_character_code (v, pr) + j := pr.item + + Result := ( + ((c1 & 0xF) |<< 12) | + ((c2 & 0x3F) |<< 6) | + ( c3 & 0x3F ) + ) + a_position.replace (j) + else + -- Do not try to decode + end + end + else + -- Do not try to decode + end + end + elseif c1 <= 0xF7 then + -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c2 := next_percent_decoded_character_code (v, pr) + j := pr.item + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c3 := next_percent_decoded_character_code (v, pr) + j := pr.item + if j + 2 <= n then + c := v.code (j + 1) + if c = 37 then -- 37 '%%' + pr.replace (j + 1) + c4 := next_percent_decoded_character_code (v, pr) + j := pr.item + + a_position.replace (j) + + Result := ( + ((c1 & 0x7) |<< 18 ) | + ((c2 & 0x3F) |<< 12) | + ((c3 & 0x3F) |<< 6) | + ( c4 & 0x3F ) + ) + else + -- Do not try to decode + end + end + else + -- Do not try to decode + end + end + else + -- Do not try to decode + end + end + else + Result := c1 + end + end + +feature -- RFC and characters + + is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN + -- Is hexadecimal character ? + do + Result := ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F') -- HEXA + or ('0' <= c and c <= '9') -- DIGIT + end + + is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN + -- Is ALPHA or DIGIT character ? + do + Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') -- ALPHA + or ('0' <= c and c <= '9') -- DIGIT + end + + is_alpha_character (c: CHARACTER_32): BOOLEAN + -- Is ALPHA character ? + do + Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') + end + + is_digit_character (c: CHARACTER_32): BOOLEAN + -- Is DIGIT character ? + do + Result := ('0' <= c and c <= '9') + end + + is_unreserved_character (c: CHARACTER_32): BOOLEAN + -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + do + if + ('a' <= c and c <= 'z') -- ALPHA + or ('A' <= c and c <= 'Z') -- ALPHA + or ('0' <= c and c <= '9') -- DIGIT + then + Result := True + else + inspect c + when '-', '_', '.', '~' then -- unreserved + Result := True + else + end + end + end + + is_reserved_character (c: CHARACTER_32): BOOLEAN + -- reserved = gen-delims / sub-delims + do + Result := is_gen_delims_character (c) or is_sub_delims_character (c) + end + + is_gen_delims_character (c: CHARACTER_32): BOOLEAN + -- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + do + inspect c + when ':' , '/', '?' , '#' , '[' , ']' , '@' then + Result := True + else + end + end + + is_sub_delims_character (c: CHARACTER_32): BOOLEAN + -- sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + -- / "*" / "+" / "," / ";" / "=" + do + inspect c + when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims + Result := True + else + end + end + +feature {NONE} -- Implementation + + hex_digit: SPECIAL [NATURAL_32] + -- Hexadecimal digits. + once + create Result.make_filled (0, 16) + Result [0] := {NATURAL_32} 48 -- 48 '0' + Result [1] := {NATURAL_32} 49 -- 49 '1' + Result [2] := {NATURAL_32} 50 -- 50 '2' + Result [3] := {NATURAL_32} 51 -- 51 '3' + Result [4] := {NATURAL_32} 52 -- 52 '4' + Result [5] := {NATURAL_32} 53 -- 53 '5' + Result [6] := {NATURAL_32} 54 -- 54 '6' + Result [7] := {NATURAL_32} 55 -- 55 '7' + Result [8] := {NATURAL_32} 56 -- 56 '8' + Result [9] := {NATURAL_32} 57 -- 57 '9' + Result [10] := {NATURAL_32} 65 -- 65 'A' + Result [11] := {NATURAL_32} 66 -- 66 'B' + Result [12] := {NATURAL_32} 67 -- 67 'C' + Result [13] := {NATURAL_32} 68 -- 68 'D' + Result [14] := {NATURAL_32} 69 -- 69 'E' + Result [15] := {NATURAL_32} 70 -- 70 'F' + end + + is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN + -- Is `a_string' a valid hexadecimal sequence? + local + l_convertor: like ctoi_convertor + do + l_convertor := ctoi_convertor + l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32) + Result := l_convertor.is_integral_integer + end + + hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32 + -- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value. + require + is_hexa: is_hexa_decimal (a_hex_string) + local + l_convertor: like ctoi_convertor + do + l_convertor := ctoi_convertor + l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation) + Result := l_convertor.parsed_natural_32 + end + + ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER + -- Converter used to convert string to integer or natural. + once + create Result.make + Result.set_leading_separators_acceptable (False) + Result.set_trailing_separators_acceptable (False) + ensure + ctoi_convertor_not_void: Result /= Void + end + +note + copyright: "Copyright (c) 2011-2014, Eiffel Software and others" + license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" + source: "[ + Eiffel Software + 5949 Hollister Ave., Goleta, CA 93117 USA + Telephone 805-685-1006, Fax 805-685-6869 + Website http://www.eiffel.com + Customer support http://support.eiffel.com + ]" +end