Moved implementation of WSF_PERCENT_ENCODER into "encoder" library, and added the *partial* variant.

This commit is contained in:
2014-04-09 17:19:09 +02:00
parent 6e27f66306
commit 9999b5e400
3 changed files with 597 additions and 502 deletions

View File

@@ -9,509 +9,11 @@ note
class
WSF_PERCENT_ENCODER
feature -- Percent encoding
percent_encoded_string (v: READABLE_STRING_GENERAL): STRING_8
-- Return `a_string' percent-encoded
do
create Result.make (v.count)
append_percent_encoded_string_to (v, Result)
end
append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `a_string' as percent-encoded value to `a_result'
local
c: NATURAL_32
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
c := s.code (i)
if
--| unreserved ALPHA / DIGIT
(48 <= c and c <= 57) -- DIGIT: 0 .. 9
or (65 <= c and c <= 90) -- ALPHA: A .. Z
or (97 <= c and c <= 122) -- ALPHA: a .. z
then
a_result.append_code (c)
else
inspect c
when
45, 46, 95, 126 -- unreserved characters: -._~
then
a_result.append_code (c)
when
58, 64, -- reserved =+ gen-delims: :@
33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()*
43, 44, 59, 61, -- reserved = sub-delims: +,;=
37 -- percent encoding: %
then
append_percent_encoded_character_code_to (c, a_result)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
i := i + 1
end
end
feature -- Percent encoding: character
append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as percent-encoded content into `a_result'
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
elseif a_code > 0x7F then
-- Extended ASCII
-- This requires percent-encoding on UTF-8 converted character.
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- ASCII
append_percent_encoded_ascii_character_code_to (a_code, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature {NONE} -- Implementation: character encoding
append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append extended ascii character code `a_code' as percent-encoded content into `a_result'
-- Note: it does not UTF-8 convert this extended ASCII.
require
is_extended_ascii: a_code <= 0xFF
local
c: INTEGER
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- Extended ASCII
c := a_code.to_integer_32
a_result.append_code (37) -- 37 '%%'
a_result.append_code (hex_digit [c |>> 4])
a_result.append_code (hex_digit [c & 0xF])
end
ensure
appended: a_result.count > old a_result.count
end
append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result'
-- Note: it does include UTF-8 conversion of extended ASCII and Unicode.
do
if a_code <= 0x7F then
-- 0xxxxxxx
append_percent_encoded_ascii_character_code_to (a_code, a_result)
elseif a_code <= 0x7FF then
-- 110xxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
elseif a_code <= 0xFFFF then
-- 1110xxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
else
-- c <= 1FFFFF - there are no higher code points
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature -- Percent decoding
percent_decoded_string (v: READABLE_STRING_GENERAL): STRING_32
-- Return the percent decoded string equivalent to the percent-encoded string `v'
--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
do
create Result.make (v.count)
append_percent_decoded_string_to (v, Result)
end
append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append to `a_result' a string equivalent to the percent-encoded string `v'
--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
local
i,n: INTEGER
c: NATURAL_32
pr: CELL [INTEGER]
a_result_is_string_32: BOOLEAN
do
a_result_is_string_32 := attached {STRING_32} a_result
from
i := 1
create pr.put (i)
n := v.count
until
i > n
loop
c := v.code (i)
inspect c
when 43 then -- 43 '+'
-- Some implementation are replacing spaces with "+" instead of "%20"
a_result.append_code (32) -- 32 ' '
when 37 then -- 37 '%%'
-- An escaped character ?
if i = n then -- Error?
a_result.append_code (c)
else
if a_result_is_string_32 then
-- Convert UTF-8 to UTF-32
pr.replace (i)
c := next_percent_decoded_unicode_character_code (v, pr)
a_result.append_code (c)
i := pr.item
else
-- Keep UTF-8
pr.replace (i)
c := next_percent_decoded_character_code (v, pr)
a_result.append_code (c)
i := pr.item
end
end
else
if c <= 0x7F then
a_result.append_code (c)
else
if a_result_is_string_32 then
a_result.append_code (c)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
end
i := i + 1
end
end
feature {NONE} -- Implementation: decoding
next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Character decoded from string `v' starting from index `a_position.item'
-- note: it also updates `a_position.item' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
c: NATURAL_32
i, n: INTEGER
not_a_digit: BOOLEAN
ascii_pos: NATURAL_32
ival: NATURAL_32
pos: INTEGER
c_is_digit: BOOLEAN
do
--| pos is index in stream of escape character ('%')
pos := a_position.item
c := v.code (pos + 1)
if c = 85 or c = 117 then -- 117 'u' 85 'U'
-- NOTE: this is not a standard, but it can occur, so use this for decoding only
-- An escaped Unicode (ucs2) value, from ECMA scripts
-- has the form: %u<n> where <n> is the UCS value
-- of the character (two byte integer, one to 4 chars
-- after escape sequence).
-- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations
-- UTF-8 result can be 1 to 4 characters.
from
i := pos + 2
n := v.count
until
(i > n) or not_a_digit
loop
c := v.code (i)
c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9
if
c_is_digit
or (97 <= c and c <= 102) -- ALPHA: a..f
or (65 <= c and c <= 70) -- ALPHA: A..F
then
ival := ival * 16
if c_is_digit then
ival := ival + (c - 48) -- 48 '0'
else
if c > 70 then -- a..f
ival := ival + (c - 97) + 10 -- 97 'a'
else -- A..F
ival := ival + (c - 65) + 10 -- 65 'A'
end
end
i := i + 1
else
not_a_digit := True
i := i - 1
end
end
a_position.replace (i)
Result := ival
else
-- ASCII char?
ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2))
Result := ascii_pos
a_position.replace (pos + 2)
end
end
next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Next decoded character from `v' at position `a_position.item'
-- note: it also updates `a_position' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
n, j: INTEGER
c: NATURAL_32
c1, c2, c3, c4: NATURAL_32
pr: CELL [INTEGER]
do
create pr.put (a_position.item)
c1 := next_percent_decoded_character_code (v, pr)
j := pr.item
n := v.count
Result := c1
a_position.replace (j)
if c1 <= 0x7F then
-- 0xxxxxxx
Result := c1
elseif c1 <= 0xDF then
-- 110xxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0x1F) |<< 6) |
( c2 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
elseif c1 <= 0xEF then
-- 1110xxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0xF) |<< 12) |
((c2 & 0x3F) |<< 6) |
( c3 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
elseif c1 <= 0xF7 then
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c4 := next_percent_decoded_character_code (v, pr)
j := pr.item
a_position.replace (j)
Result := (
((c1 & 0x7) |<< 18 ) |
((c2 & 0x3F) |<< 12) |
((c3 & 0x3F) |<< 6) |
( c4 & 0x3F )
)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
Result := c1
end
end
feature -- RFC and characters
is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN
-- Is hexadecimal character ?
do
Result := ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F') -- HEXA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA or DIGIT character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z')
end
is_digit_character (c: CHARACTER_32): BOOLEAN
-- Is DIGIT character ?
do
Result := ('0' <= c and c <= '9')
end
is_unreserved_character (c: CHARACTER_32): BOOLEAN
-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
do
if
('a' <= c and c <= 'z') -- ALPHA
or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
then
Result := True
else
inspect c
when '-', '_', '.', '~' then -- unreserved
Result := True
else
end
end
end
is_reserved_character (c: CHARACTER_32): BOOLEAN
-- reserved = gen-delims / sub-delims
do
Result := is_gen_delims_character (c) or is_sub_delims_character (c)
end
is_gen_delims_character (c: CHARACTER_32): BOOLEAN
-- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
do
inspect c
when ':' , '/', '?' , '#' , '[' , ']' , '@' then
Result := True
else
end
end
is_sub_delims_character (c: CHARACTER_32): BOOLEAN
-- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-- / "*" / "+" / "," / ";" / "="
do
inspect c
when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims
Result := True
else
end
end
feature {NONE} -- Implementation
hex_digit: SPECIAL [NATURAL_32]
-- Hexadecimal digits.
once
create Result.make_filled (0, 16)
Result [0] := {NATURAL_32} 48 -- 48 '0'
Result [1] := {NATURAL_32} 49 -- 49 '1'
Result [2] := {NATURAL_32} 50 -- 50 '2'
Result [3] := {NATURAL_32} 51 -- 51 '3'
Result [4] := {NATURAL_32} 52 -- 52 '4'
Result [5] := {NATURAL_32} 53 -- 53 '5'
Result [6] := {NATURAL_32} 54 -- 54 '6'
Result [7] := {NATURAL_32} 55 -- 55 '7'
Result [8] := {NATURAL_32} 56 -- 56 '8'
Result [9] := {NATURAL_32} 57 -- 57 '9'
Result [10] := {NATURAL_32} 65 -- 65 'A'
Result [11] := {NATURAL_32} 66 -- 66 'B'
Result [12] := {NATURAL_32} 67 -- 67 'C'
Result [13] := {NATURAL_32} 68 -- 68 'D'
Result [14] := {NATURAL_32} 69 -- 69 'E'
Result [15] := {NATURAL_32} 70 -- 70 'F'
end
is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN
-- Is `a_string' a valid hexadecimal sequence?
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32)
Result := l_convertor.is_integral_integer
end
hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32
-- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value.
require
is_hexa: is_hexa_decimal (a_hex_string)
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation)
Result := l_convertor.parsed_natural_32
end
ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER
-- Converter used to convert string to integer or natural.
once
create Result.make
Result.set_leading_separators_acceptable (False)
Result.set_trailing_separators_acceptable (False)
ensure
ctoi_convertor_not_void: Result /= Void
end
inherit
PERCENT_ENCODER
note
copyright: "2011-2013, Jocelyn Fiat, Javier Velilla, Olivier Ligot, Eiffel Software and others"
copyright: "2011-2014, Jocelyn Fiat, Javier Velilla, Olivier Ligot, Colin Adams, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software

View File

@@ -1,5 +1,5 @@
${NOTE_KEYWORD}
copyright: "2011-${YEAR}, Eiffel Software and others"
copyright: "Copyright (c) 2011-${YEAR}, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software

View File

@@ -0,0 +1,593 @@
note
description: "[
Component to handle percent encoding
]"
date: "$Date: 2014-04-09 16:37:28 +0200 (mer., 09 avr. 2014) $"
revision: "$Revision: 94801 $"
EIS: "name=Percent-encoding", "protocol=URI", "src=http://en.wikipedia.org/wiki/Percent-encoding"
class
PERCENT_ENCODER
feature -- Status report
has_error: BOOLEAN
-- Error occurred
--| For now this is not fully implemented, and thus not reliable.
feature -- Percent encoding
percent_encoded_string (s: READABLE_STRING_GENERAL): STRING_8
-- Return `s' percent-encoded
do
create Result.make (s.count)
append_percent_encoded_string_to (s, Result)
end
append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `s' as percent-encoded value to `a_result'
local
c: NATURAL_32
i,n: INTEGER
do
has_error := False
from
i := 1
n := s.count
until
i > n
loop
c := s.code (i)
if
--| unreserved ALPHA / DIGIT
(48 <= c and c <= 57) -- DIGIT: 0 .. 9
or (65 <= c and c <= 90) -- ALPHA: A .. Z
or (97 <= c and c <= 122) -- ALPHA: a .. z
then
a_result.append_code (c)
else
inspect c
when
45, 46, 95, 126 -- unreserved characters: -._~
then
a_result.append_code (c)
when
58, 64, -- reserved =+ gen-delims: :@
33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()*
43, 44, 59, 61, -- reserved = sub-delims: +,;=
37 -- percent encoding: %
then
append_percent_encoded_character_code_to (c, a_result)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
i := i + 1
end
end
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32]): STRING_8
-- Return `s' as percent-encoded value,
-- but does not escape character listed in `a_ignore'.
do
create Result.make (s.count)
append_partial_percent_encoded_string_to (s, Result, a_ignore)
end
append_partial_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL; a_ignore: ITERABLE [CHARACTER_32])
-- Append `s' as percent-encoded value to `a_result',
-- but does not escape character listed in `a_ignore'.
local
c: NATURAL_32
ch: CHARACTER_32
i,n: INTEGER
do
has_error := False
from
i := 1
n := s.count
until
i > n
loop
c := s.code (i)
if
--| unreserved ALPHA / DIGIT
(48 <= c and c <= 57) -- DIGIT: 0 .. 9
or (65 <= c and c <= 90) -- ALPHA: A .. Z
or (97 <= c and c <= 122) -- ALPHA: a .. z
then
a_result.append_code (c)
else
inspect c
when
45, 46, 95, 126 -- unreserved characters: -._~
then
a_result.append_code (c)
when
58, 64, -- reserved =+ gen-delims: :@
33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: !$&'()*
43, 44, 59, 61, -- reserved = sub-delims: +,;=
37 -- percent encoding: %
then
ch := c.to_character_32
if across a_ignore as ic some ic.item = ch end then
a_result.append_code (c)
else
append_percent_encoded_character_code_to (c, a_result)
end
else
if across a_ignore as ic some ic.item = ch end then
a_result.append_code (c)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
end
i := i + 1
end
end
feature -- Percent encoding: character
append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as percent-encoded content into `a_result'
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
elseif a_code > 0x7F then
-- Extended ASCII
-- This requires percent-encoding on UTF-8 converted character.
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- ASCII
append_percent_encoded_ascii_character_code_to (a_code, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature {NONE} -- Implementation: character encoding
append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append extended ascii character code `a_code' as percent-encoded content into `a_result'
-- Note: it does not UTF-8 convert this extended ASCII.
require
is_extended_ascii: a_code <= 0xFF
local
c: INTEGER
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- Extended ASCII
c := a_code.to_integer_32
a_result.append_code (37) -- 37 '%%'
a_result.append_code (hex_digit [c |>> 4])
a_result.append_code (hex_digit [c & 0xF])
end
ensure
appended: a_result.count > old a_result.count
end
append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result'
-- Note: it does include UTF-8 conversion of extended ASCII and Unicode.
do
if a_code <= 0x7F then
-- 0xxxxxxx
append_percent_encoded_ascii_character_code_to (a_code, a_result)
elseif a_code <= 0x7FF then
-- 110xxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
elseif a_code <= 0xFFFF then
-- 1110xxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
else
-- c <= 1FFFFF - there are no higher code points
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature -- Percent decoding
percent_decoded_string (v: READABLE_STRING_GENERAL): STRING_32
-- Return the percent decoded string equivalent to the percent-encoded string `v'
--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
do
create Result.make (v.count)
append_percent_decoded_string_to (v, Result)
end
append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append to `a_result' a string equivalent to the percent-encoded string `v'
--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
local
i,n: INTEGER
c: NATURAL_32
pr: CELL [INTEGER]
a_result_is_string_32: BOOLEAN
do
has_error := False
a_result_is_string_32 := attached {STRING_32} a_result
from
i := 1
create pr.put (i)
n := v.count
until
i > n
loop
c := v.code (i)
inspect c
when 43 then -- 43 '+'
-- Some implementation are replacing spaces with "+" instead of "%20"
a_result.append_code (32) -- 32 ' '
when 37 then -- 37 '%%'
-- An escaped character ?
if i = n then -- Error?
has_error := True
a_result.append_code (c)
else
if a_result_is_string_32 then
-- Convert UTF-8 to UTF-32
pr.replace (i)
c := next_percent_decoded_unicode_character_code (v, pr)
a_result.append_code (c)
i := pr.item
else
-- Keep UTF-8
pr.replace (i)
c := next_percent_decoded_character_code (v, pr)
a_result.append_code (c)
i := pr.item
end
end
else
if c <= 0x7F then
a_result.append_code (c)
else
if a_result_is_string_32 then
a_result.append_code (c)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
end
i := i + 1
end
end
feature {NONE} -- Implementation: decoding
next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Character decoded from string `v' starting from index `a_position.item'
-- note: it also updates `a_position.item' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
c: NATURAL_32
i, n: INTEGER
not_a_digit: BOOLEAN
ascii_pos: NATURAL_32
ival: NATURAL_32
pos: INTEGER
c_is_digit: BOOLEAN
do
--| pos is index in stream of escape character ('%')
pos := a_position.item
c := v.code (pos + 1)
if c = 85 or c = 117 then -- 117 'u' 85 'U'
-- NOTE: this is not a standard, but it can occur, so use this for decoding only
-- An escaped Unicode (ucs2) value, from ECMA scripts
-- has the form: %u<n> where <n> is the UCS value
-- of the character (two byte integer, one to 4 chars
-- after escape sequence).
-- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations
-- UTF-8 result can be 1 to 4 characters.
from
i := pos + 2
n := v.count
until
(i > n) or not_a_digit
loop
c := v.code (i)
c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9
if
c_is_digit
or (97 <= c and c <= 102) -- ALPHA: a..f
or (65 <= c and c <= 70) -- ALPHA: A..F
then
ival := ival * 16
if c_is_digit then
ival := ival + (c - 48) -- 48 '0'
else
if c > 70 then -- a..f
ival := ival + (c - 97) + 10 -- 97 'a'
else -- A..F
ival := ival + (c - 65) + 10 -- 65 'A'
end
end
i := i + 1
else
not_a_digit := True
i := i - 1
end
end
a_position.replace (i)
Result := ival
else
-- ASCII char?
ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2))
Result := ascii_pos
a_position.replace (pos + 2)
end
end
next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Next decoded character from `v' at position `a_position.item'
-- note: it also updates `a_position' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
n, j: INTEGER
c: NATURAL_32
c1, c2, c3, c4: NATURAL_32
pr: CELL [INTEGER]
do
create pr.put (a_position.item)
c1 := next_percent_decoded_character_code (v, pr)
j := pr.item
n := v.count
Result := c1
a_position.replace (j)
if c1 <= 0x7F then
-- 0xxxxxxx
Result := c1
elseif c1 <= 0xDF then
-- 110xxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0x1F) |<< 6) |
( c2 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
elseif c1 <= 0xEF then
-- 1110xxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0xF) |<< 12) |
((c2 & 0x3F) |<< 6) |
( c3 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
elseif c1 <= 0xF7 then
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c4 := next_percent_decoded_character_code (v, pr)
j := pr.item
a_position.replace (j)
Result := (
((c1 & 0x7) |<< 18 ) |
((c2 & 0x3F) |<< 12) |
((c3 & 0x3F) |<< 6) |
( c4 & 0x3F )
)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
Result := c1
end
end
feature -- RFC and characters
is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN
-- Is hexadecimal character ?
do
Result := ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F') -- HEXA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA or DIGIT character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z')
end
is_digit_character (c: CHARACTER_32): BOOLEAN
-- Is DIGIT character ?
do
Result := ('0' <= c and c <= '9')
end
is_unreserved_character (c: CHARACTER_32): BOOLEAN
-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
do
if
('a' <= c and c <= 'z') -- ALPHA
or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
then
Result := True
else
inspect c
when '-', '_', '.', '~' then -- unreserved
Result := True
else
end
end
end
is_reserved_character (c: CHARACTER_32): BOOLEAN
-- reserved = gen-delims / sub-delims
do
Result := is_gen_delims_character (c) or is_sub_delims_character (c)
end
is_gen_delims_character (c: CHARACTER_32): BOOLEAN
-- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
do
inspect c
when ':' , '/', '?' , '#' , '[' , ']' , '@' then
Result := True
else
end
end
is_sub_delims_character (c: CHARACTER_32): BOOLEAN
-- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-- / "*" / "+" / "," / ";" / "="
do
inspect c
when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims
Result := True
else
end
end
feature {NONE} -- Implementation
hex_digit: SPECIAL [NATURAL_32]
-- Hexadecimal digits.
once
create Result.make_filled (0, 16)
Result [0] := {NATURAL_32} 48 -- 48 '0'
Result [1] := {NATURAL_32} 49 -- 49 '1'
Result [2] := {NATURAL_32} 50 -- 50 '2'
Result [3] := {NATURAL_32} 51 -- 51 '3'
Result [4] := {NATURAL_32} 52 -- 52 '4'
Result [5] := {NATURAL_32} 53 -- 53 '5'
Result [6] := {NATURAL_32} 54 -- 54 '6'
Result [7] := {NATURAL_32} 55 -- 55 '7'
Result [8] := {NATURAL_32} 56 -- 56 '8'
Result [9] := {NATURAL_32} 57 -- 57 '9'
Result [10] := {NATURAL_32} 65 -- 65 'A'
Result [11] := {NATURAL_32} 66 -- 66 'B'
Result [12] := {NATURAL_32} 67 -- 67 'C'
Result [13] := {NATURAL_32} 68 -- 68 'D'
Result [14] := {NATURAL_32} 69 -- 69 'E'
Result [15] := {NATURAL_32} 70 -- 70 'F'
end
is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN
-- Is `a_string' a valid hexadecimal sequence?
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32)
Result := l_convertor.is_integral_integer
end
hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32
-- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value.
require
is_hexa: is_hexa_decimal (a_hex_string)
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation)
Result := l_convertor.parsed_natural_32
end
ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER
-- Converter used to convert string to integer or natural.
once
create Result.make
Result.set_leading_separators_acceptable (False)
Result.set_trailing_separators_acceptable (False)
ensure
ctoi_convertor_not_void: Result /= Void
end
note
copyright: "Copyright (c) 2011-2014, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software
5949 Hollister Ave., Goleta, CA 93117 USA
Telephone 805-685-1006, Fax 805-685-6869
Website http://www.eiffel.com
Customer support http://support.eiffel.com
]"
end