Improved query and form data encoding (based on a very early version of the general URI percent-encoding rules).

- now correct encoding of space by '%20' in path segment, and '+' in query parameters.
Unify and fixed query parameters handling for libcurl and net implementation.
Fixed file uploading (various issue in libcurl, and net implementation).
Fixed form multipart encoding by using correctly the boundary.
Updated autotest cases.
Code cleaning.
This commit is contained in:
Jocelyn Fiat
2017-05-17 12:16:35 +02:00
parent 485a3812d9
commit 69b5ce637e
15 changed files with 932 additions and 122 deletions

View File

@@ -0,0 +1,628 @@
note
description: "[
Component to handle percent encoding
WARNING: THIS IS A COPY FROM $ISE_LIBRARY/library/text/uri library.
In the future, http_client will use directly the `uri` library.
]"
date: "$Date$"
revision: "$Revision$"
EIS: "name=Percent-encoding", "protocol=URI", "src=http://en.wikipedia.org/wiki/Percent-encoding"
class
URI_PERCENT_ENCODER
feature -- Percent encoding
append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `s' as percent-encoded value to `a_result'
local
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
append_encoded_character_code_to (s.code (i), a_result)
i := i + 1
end
end
append_query_name_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `s' as encoded for URI query name to `a_result'
local
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
append_query_name_encoded_character_code_to (s.code (i), a_result)
i := i + 1
end
end
append_query_value_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `s' as encoded for URI query value to `a_result'.
local
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
append_query_value_encoded_character_code_to (s.code (i), a_result)
i := i + 1
end
end
append_path_segment_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `a_string' as encoded for URI path segment to `a_result'
local
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
append_path_segment_encoded_character_code_to (s.code (i), a_result)
i := i + 1
end
end
append_www_form_url_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append `a_string' as www-form-urlencoded value to `a_result'.
-- The main difference with `append_percent_encoded_string_to` is the encoding of space using '+'.
local
i,n: INTEGER
do
from
i := 1
n := s.count
until
i > n
loop
inspect s.code (i)
when 32 then -- space: 32 ' '
a_result.append_code (43) -- 43 '+'
else
append_encoded_character_code_to (s.code (i), a_result)
end
i := i + 1
end
end
feature {NONE} -- URI building helpers
append_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `c' as query name encoded content into `a_result'.
do
if
--| unreserved ALPHA / DIGIT
(48 <= c and c <= 57) -- DIGIT: 0 .. 9
or (65 <= c and c <= 90) -- ALPHA: A .. Z
or (97 <= c and c <= 122) -- ALPHA: a .. z
then
a_result.append_code (c)
else
inspect c
when
45, 46, 95, 126 -- unreserved characters: -._~
then
a_result.append_code (c)
when
58, 64, -- reserved =+ gen-delims: : @
33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: ! $ & ' ( ) *
43, 44, 59, 61, -- reserved = sub-delims: + , ; =
37 -- percent encoding: %
then
append_percent_encoded_character_code_to (c, a_result)
else
append_percent_encoded_character_code_to (c, a_result)
end
end
end
append_query_name_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as query name encoded content into `a_result'.
do
inspect c
when 61 then -- equal sign: =
append_percent_encoded_character_code_to (c, a_result)
else
append_query_value_encoded_character_code_to (c, a_result)
end
end
append_query_value_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as query value encoded content into `a_result'.
do
inspect c
when 32 then -- Space
a_result.append_code (43) -- 43 '+'
when
39, -- '
58, 64, -- reserved =+ gen-delims: : @
33, 36, 40, 41, 42, -- reserved =+ sub-delims: ! $ ( ) *
44, 59, 61 -- reserved = sub-delims: , ; =
then
a_result.append_code (c)
when
47, -- slash: /
63 -- question mark ?
then
a_result.append_code (c)
else
append_encoded_character_code_to (c, a_result)
end
end
append_path_segment_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as query name encoded content into `a_result'.
do
append_encoded_character_code_to (c, a_result)
end
feature -- Percent encoding: character
append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append character code `a_code' as percent-encoded content into `a_result'
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
elseif a_code > 0x7F then
-- Extended ASCII
-- This requires percent-encoding on UTF-8 converted character.
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- ASCII
append_percent_encoded_ascii_character_code_to (a_code, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature {NONE} -- Implementation: character encoding
append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append extended ascii character code `a_code' as percent-encoded content into `a_result'
-- Note: it does not UTF-8 convert this extended ASCII.
require
is_extended_ascii: a_code <= 0xFF
local
c: INTEGER
do
if a_code > 0xFF then
-- Unicode
append_percent_encoded_unicode_character_code_to (a_code, a_result)
else
-- Extended ASCII
c := a_code.to_integer_32
a_result.append_code (37) -- 37 '%%'
a_result.append_code (hex_digit [c |>> 4])
a_result.append_code (hex_digit [c & 0xF])
end
ensure
appended: a_result.count > old a_result.count
end
append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
-- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result'
-- Note: it does include UTF-8 conversion of extended ASCII and Unicode.
do
if a_code <= 0x7F then
-- 0xxxxxxx
append_percent_encoded_ascii_character_code_to (a_code, a_result)
elseif a_code <= 0x7FF then
-- 110xxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
elseif a_code <= 0xFFFF then
-- 1110xxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
else
-- c <= 1FFFFF - there are no higher code points
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
end
ensure
appended: a_result.count > old a_result.count
end
feature -- Percent decoding
append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
-- Append to `a_result' a string equivalent to the percent-encoded string `v'
--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
local
i,n: INTEGER
c: NATURAL_32
pr: CELL [INTEGER]
a_result_is_string_32: BOOLEAN
do
a_result_is_string_32 := attached {STRING_32} a_result
from
i := 1
create pr.put (i)
n := v.count
until
i > n
loop
c := v.code (i)
inspect c
when 43 then -- 43 '+'
-- Some implementation are replacing spaces with "+" instead of "%20"
a_result.append_code (32) -- 32 ' '
when 37 then -- 37 '%%'
-- An escaped character ?
if i = n then -- Error?
a_result.append_code (c)
else
if a_result_is_string_32 then
-- Convert UTF-8 to UTF-32
pr.replace (i)
c := next_percent_decoded_unicode_character_code (v, pr)
a_result.append_code (c)
i := pr.item
else
-- Keep UTF-8
pr.replace (i)
c := next_percent_decoded_character_code (v, pr)
a_result.append_code (c)
i := pr.item
end
end
else
if c <= 0x7F then
a_result.append_code (c)
else
if a_result_is_string_32 then
a_result.append_code (c)
else
-- Keep the percent encoded char for non string 32.
append_percent_encoded_character_code_to (c, a_result)
end
end
end
i := i + 1
end
end
feature {NONE} -- Implementation: decoding
next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Character decoded from string `v' starting from index `a_position.item'
-- note: it also updates `a_position.item' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
c: NATURAL_32
i, n: INTEGER
not_a_digit: BOOLEAN
ascii_pos: NATURAL_32
ival: NATURAL_32
pos: INTEGER
c_is_digit: BOOLEAN
do
--| pos is index in stream of escape character ('%')
pos := a_position.item
c := v.code (pos + 1)
if c = 85 or c = 117 then -- 117 'u' 85 'U'
-- NOTE: this is not a standard, but it can occur, so use this for decoding only
-- An escaped Unicode (ucs2) value, from ECMA scripts
-- has the form: %u<n> where <n> is the UCS value
-- of the character (two byte integer, one to 4 chars
-- after escape sequence).
-- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations
-- UTF-8 result can be 1 to 4 characters.
from
i := pos + 2
n := v.count
until
(i > n) or not_a_digit
loop
c := v.code (i)
c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9
if
c_is_digit
or (97 <= c and c <= 102) -- ALPHA: a..f
or (65 <= c and c <= 70) -- ALPHA: A..F
then
ival := ival * 16
if c_is_digit then
ival := ival + (c - 48) -- 48 '0'
else
if c > 70 then -- a..f
ival := ival + (c - 97) + 10 -- 97 'a'
else -- A..F
ival := ival + (c - 65) + 10 -- 65 'A'
end
end
i := i + 1
else
not_a_digit := True
i := i - 1
end
end
a_position.replace (i)
Result := ival
else
-- ASCII char?
ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2))
Result := ascii_pos
a_position.replace (pos + 2)
end
end
next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
-- Next decoded character from `v' at position `a_position.item'
-- note: it also updates `a_position' to indicate the new index position.
require
valid_start: a_position.item <= v.count
is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
local
n, j: INTEGER
c: NATURAL_32
c1, c2, c3, c4: NATURAL_32
pr: CELL [INTEGER]
do
create pr.put (a_position.item)
c1 := next_percent_decoded_character_code (v, pr)
j := pr.item
n := v.count
Result := c1
a_position.replace (j)
if c1 <= 0x7F then
-- 0xxxxxxx
Result := c1
elseif c1 <= 0xDF then
-- 110xxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0x1F) |<< 6) |
( c2 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
elseif c1 <= 0xEF then
-- 1110xxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
Result := (
((c1 & 0xF) |<< 12) |
((c2 & 0x3F) |<< 6) |
( c3 & 0x3F )
)
a_position.replace (j)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
elseif c1 <= 0xF7 then
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c2 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c3 := next_percent_decoded_character_code (v, pr)
j := pr.item
if j + 2 <= n then
c := v.code (j + 1)
if c = 37 then -- 37 '%%'
pr.replace (j + 1)
c4 := next_percent_decoded_character_code (v, pr)
j := pr.item
a_position.replace (j)
Result := (
((c1 & 0x7) |<< 18 ) |
((c2 & 0x3F) |<< 12) |
((c3 & 0x3F) |<< 6) |
( c4 & 0x3F )
)
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
-- Do not try to decode
end
end
else
Result := c1
end
end
feature -- RFC and characters
is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN
-- Is hexadecimal character ?
do
Result := ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F') -- HEXA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA or DIGIT character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
end
is_alpha_character (c: CHARACTER_32): BOOLEAN
-- Is ALPHA character ?
do
Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z')
end
is_digit_character (c: CHARACTER_32): BOOLEAN
-- Is DIGIT character ?
do
Result := ('0' <= c and c <= '9')
end
is_unreserved_character (c: CHARACTER_32): BOOLEAN
-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
do
if
('a' <= c and c <= 'z') -- ALPHA
or ('A' <= c and c <= 'Z') -- ALPHA
or ('0' <= c and c <= '9') -- DIGIT
then
Result := True
else
inspect c
when '-', '_', '.', '~' then -- unreserved
Result := True
else
end
end
end
is_reserved_character (c: CHARACTER_32): BOOLEAN
-- reserved = gen-delims / sub-delims
do
Result := is_gen_delims_character (c) or is_sub_delims_character (c)
end
is_gen_delims_character (c: CHARACTER_32): BOOLEAN
-- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
do
inspect c
when ':' , '/', '?' , '#' , '[' , ']' , '@' then
Result := True
else
end
end
is_sub_delims_character (c: CHARACTER_32): BOOLEAN
-- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-- / "*" / "+" / "," / ";" / "="
do
inspect c
when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims
Result := True
else
end
end
feature {NONE} -- Implementation
hex_digit: SPECIAL [NATURAL_32]
-- Hexadecimal digits.
once
create Result.make_filled (0, 16)
Result [0] := {NATURAL_32} 48 -- 48 '0'
Result [1] := {NATURAL_32} 49 -- 49 '1'
Result [2] := {NATURAL_32} 50 -- 50 '2'
Result [3] := {NATURAL_32} 51 -- 51 '3'
Result [4] := {NATURAL_32} 52 -- 52 '4'
Result [5] := {NATURAL_32} 53 -- 53 '5'
Result [6] := {NATURAL_32} 54 -- 54 '6'
Result [7] := {NATURAL_32} 55 -- 55 '7'
Result [8] := {NATURAL_32} 56 -- 56 '8'
Result [9] := {NATURAL_32} 57 -- 57 '9'
Result [10] := {NATURAL_32} 65 -- 65 'A'
Result [11] := {NATURAL_32} 66 -- 66 'B'
Result [12] := {NATURAL_32} 67 -- 67 'C'
Result [13] := {NATURAL_32} 68 -- 68 'D'
Result [14] := {NATURAL_32} 69 -- 69 'E'
Result [15] := {NATURAL_32} 70 -- 70 'F'
end
is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN
-- Is `a_string' a valid hexadecimal sequence?
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32)
Result := l_convertor.is_integral_integer
end
hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32
-- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value.
require
is_hexa: is_hexa_decimal (a_hex_string)
local
l_convertor: like ctoi_convertor
do
l_convertor := ctoi_convertor
l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation)
Result := l_convertor.parsed_natural_32
end
ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER
-- Converter used to convert string to integer or natural.
once
create Result.make
Result.set_leading_separators_acceptable (False)
Result.set_trailing_separators_acceptable (False)
ensure
ctoi_convertor_not_void: Result /= Void
end
note
copyright: "2011-2017, Jocelyn Fiat, Javier Velilla, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software
5949 Hollister Ave., Goleta, CA 93117 USA
Telephone 805-685-1006, Fax 805-685-6869
Website http://www.eiffel.com
Customer support http://support.eiffel.com
]"
end