Improved query and form data encoding (based on a very early version of the general URI percent-encoding rules).

- now correct encoding of space by '%20' in path segment, and '+' in query parameters. Unify and fixed query parameters handling for libcurl and net implementation. Fixed file uploading (various issue in libcurl, and net implementation). Fixed form multipart encoding by using correctly the boundary. Updated autotest cases. Code cleaning.
2017-05-17 12:16:35 +02:00
parent 485a3812d9
commit 69b5ce637e
15 changed files with 932 additions and 122 deletions
--- a/library/network/http_client/src/implementation/uri_percent_encoder.e
+++ b/library/network/http_client/src/implementation/uri_percent_encoder.e
@@ -0,0 +1,628 @@
+note
+	description: "[
+				Component to handle percent encoding
+
+				WARNING: THIS IS A COPY FROM $ISE_LIBRARY/library/text/uri library.
+				In the future, http_client will use directly the `uri` library.
+			]"
+	date: "$Date$"
+	revision: "$Revision$"
+	EIS: "name=Percent-encoding", "protocol=URI", "src=http://en.wikipedia.org/wiki/Percent-encoding"
+
+class
+	URI_PERCENT_ENCODER
+
+feature -- Percent encoding
+
+	append_percent_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append `s' as percent-encoded value to `a_result'
+		local
+			i,n: INTEGER
+		do
+			from
+				i := 1
+				n := s.count
+			until
+				i > n
+			loop
+				append_encoded_character_code_to (s.code (i), a_result)
+				i := i + 1
+			end
+		end
+
+	append_query_name_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append `s' as encoded for URI query name to `a_result'
+		local
+			i,n: INTEGER
+		do
+			from
+				i := 1
+				n := s.count
+			until
+				i > n
+			loop
+				append_query_name_encoded_character_code_to (s.code (i), a_result)
+				i := i + 1
+			end
+		end
+
+	append_query_value_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append `s' as encoded for URI query value to `a_result'.
+		local
+			i,n: INTEGER
+		do
+			from
+				i := 1
+				n := s.count
+			until
+				i > n
+			loop
+				append_query_value_encoded_character_code_to (s.code (i), a_result)
+				i := i + 1
+			end
+		end
+
+	append_path_segment_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append `a_string' as encoded for URI path segment to `a_result'
+		local
+			i,n: INTEGER
+		do
+			from
+				i := 1
+				n := s.count
+			until
+				i > n
+			loop
+				append_path_segment_encoded_character_code_to (s.code (i), a_result)
+				i := i + 1
+			end
+		end
+
+	append_www_form_url_encoded_string_to (s: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append `a_string' as www-form-urlencoded value to `a_result'.
+			-- The main difference with `append_percent_encoded_string_to` is the encoding of space using '+'.
+		local
+			i,n: INTEGER
+		do
+			from
+				i := 1
+				n := s.count
+			until
+				i > n
+			loop
+				inspect s.code (i)
+				when 32 then -- space: 32 ' '
+					a_result.append_code (43) -- 43 '+'
+				else
+					append_encoded_character_code_to (s.code (i), a_result)
+				end
+				i := i + 1
+			end
+		end
+
+feature {NONE} -- URI building helpers		
+
+	append_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append character code `c' as query name encoded content into `a_result'.
+		do
+			if
+					--| unreserved ALPHA / DIGIT
+				   (48 <= c and c <= 57)  -- DIGIT: 0 .. 9
+				or (65 <= c and c <= 90)  -- ALPHA: A .. Z
+				or (97 <= c and c <= 122) -- ALPHA: a .. z
+			then
+				a_result.append_code (c)
+			else
+				inspect c
+				when
+					45, 46, 95, 126 -- unreserved characters: -._~
+				then
+					a_result.append_code (c)
+				when
+					58, 64, -- reserved =+ gen-delims: : @
+					33, 36, 38, 39, 40, 41, 42, -- reserved =+ sub-delims: ! $ & ' ( ) *
+					43, 44, 59, 61, -- reserved = sub-delims: + , ; =
+					37 -- percent encoding: %
+				then
+					append_percent_encoded_character_code_to (c, a_result)
+				else
+					append_percent_encoded_character_code_to (c, a_result)
+				end
+			end
+		end
+
+	append_query_name_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append character code `a_code' as query name encoded content into `a_result'.
+		do
+			inspect c
+			when 61 then -- equal sign: =
+				append_percent_encoded_character_code_to (c, a_result)
+			else
+				append_query_value_encoded_character_code_to (c, a_result)
+			end
+		end
+
+	append_query_value_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append character code `a_code' as query value encoded content into `a_result'.
+		do
+			inspect c
+			when 32 then -- Space
+				a_result.append_code (43) -- 43 '+'
+			when
+				39, -- '
+				58, 64, -- reserved =+ gen-delims: : @
+				33, 36, 40, 41, 42, -- reserved =+ sub-delims: ! $ ( ) *
+				44, 59, 61 -- reserved = sub-delims: , ; =
+			then
+				a_result.append_code (c)
+			when
+				47, -- slash: /
+				63  -- question mark ?
+			then
+				a_result.append_code (c)
+			else
+				append_encoded_character_code_to (c, a_result)
+			end
+		end
+
+	append_path_segment_encoded_character_code_to (c: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append character code `a_code' as query name encoded content into `a_result'.
+		do
+			append_encoded_character_code_to (c, a_result)
+		end
+
+feature -- Percent encoding: character		
+
+	append_percent_encoded_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append character code `a_code' as percent-encoded content into `a_result'
+		do
+			if a_code > 0xFF then
+				-- Unicode
+				append_percent_encoded_unicode_character_code_to (a_code, a_result)
+			elseif a_code > 0x7F then
+				-- Extended ASCII
+				-- This requires percent-encoding on UTF-8 converted character.
+				append_percent_encoded_unicode_character_code_to (a_code, a_result)
+			else
+				-- ASCII
+				append_percent_encoded_ascii_character_code_to (a_code, a_result)
+			end
+		ensure
+			appended: a_result.count > old a_result.count
+		end
+
+feature {NONE} -- Implementation: character encoding
+
+	append_percent_encoded_ascii_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append extended ascii character code `a_code' as percent-encoded content into `a_result'
+			-- Note: it does not UTF-8 convert this extended ASCII.
+		require
+			is_extended_ascii: a_code <= 0xFF
+		local
+			c: INTEGER
+		do
+			if a_code > 0xFF then
+				-- Unicode
+				append_percent_encoded_unicode_character_code_to (a_code, a_result)
+			else
+				-- Extended ASCII
+				c := a_code.to_integer_32
+				a_result.append_code (37) -- 37 '%%'
+	 			a_result.append_code (hex_digit [c |>> 4])
+	 			a_result.append_code (hex_digit [c & 0xF])
+			end
+		ensure
+			appended: a_result.count > old a_result.count
+		end
+
+	append_percent_encoded_unicode_character_code_to (a_code: NATURAL_32; a_result: STRING_GENERAL)
+			-- Append Unicode character code `a_code' as UTF-8 and percent-encoded content into `a_result'
+			-- Note: it does include UTF-8 conversion of extended ASCII and Unicode.
+		do
+			if a_code <= 0x7F then
+					-- 0xxxxxxx
+				append_percent_encoded_ascii_character_code_to (a_code, a_result)
+			elseif a_code <= 0x7FF then
+					-- 110xxxxx 10xxxxxx
+				append_percent_encoded_ascii_character_code_to ((a_code |>> 6) | 0xC0, a_result)
+				append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
+			elseif a_code <= 0xFFFF then
+					-- 1110xxxx 10xxxxxx 10xxxxxx
+				append_percent_encoded_ascii_character_code_to ((a_code |>> 12) | 0xE0, a_result)
+				append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
+				append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
+			else
+					-- c <= 1FFFFF - there are no higher code points
+					-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+				append_percent_encoded_ascii_character_code_to ((a_code |>> 18) | 0xF0, a_result)
+				append_percent_encoded_ascii_character_code_to (((a_code |>> 12) & 0x3F) | 0x80, a_result)
+				append_percent_encoded_ascii_character_code_to (((a_code |>> 6) & 0x3F) | 0x80, a_result)
+				append_percent_encoded_ascii_character_code_to ((a_code & 0x3F) | 0x80, a_result)
+			end
+		ensure
+			appended: a_result.count > old a_result.count
+		end
+
+feature -- Percent decoding
+
+	append_percent_decoded_string_to (v: READABLE_STRING_GENERAL; a_result: STRING_GENERAL)
+			-- Append to `a_result' a string equivalent to the percent-encoded string `v'
+			--| Note that is `a_result' is a STRING_8, any Unicode character will be kept as UTF-8
+		local
+			i,n: INTEGER
+			c: NATURAL_32
+			pr: CELL [INTEGER]
+			a_result_is_string_32: BOOLEAN
+		do
+			a_result_is_string_32 := attached {STRING_32} a_result
+			from
+				i := 1
+				create pr.put (i)
+				n := v.count
+			until
+				i > n
+			loop
+				c := v.code (i)
+				inspect c
+				when 43 then -- 43 '+'
+						-- Some implementation are replacing spaces with "+" instead of "%20"
+					a_result.append_code (32) -- 32 ' '
+				when 37 then -- 37 '%%'
+						-- An escaped character ?
+					if i = n then -- Error?
+						a_result.append_code (c)
+					else
+						if a_result_is_string_32 then
+								-- Convert UTF-8 to UTF-32
+							pr.replace (i)
+							c := next_percent_decoded_unicode_character_code (v, pr)
+							a_result.append_code (c)
+							i := pr.item
+						else
+								-- Keep UTF-8
+							pr.replace (i)
+							c := next_percent_decoded_character_code (v, pr)
+							a_result.append_code (c)
+							i := pr.item
+						end
+					end
+				else
+					if c <= 0x7F then
+						a_result.append_code (c)
+					else
+						if a_result_is_string_32 then
+							a_result.append_code (c)
+						else
+								-- Keep the percent encoded char for non string 32.
+							append_percent_encoded_character_code_to (c, a_result)
+						end
+					end
+				end
+				i := i + 1
+			end
+		end
+
+feature {NONE} -- Implementation: decoding
+
+	next_percent_decoded_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
+			-- Character decoded from string `v' starting from index `a_position.item'
+			-- note: it also updates `a_position.item' to indicate the new index position.
+		require
+			valid_start: a_position.item <= v.count
+			is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
+		local
+			c: NATURAL_32
+			i, n: INTEGER
+			not_a_digit: BOOLEAN
+			ascii_pos: NATURAL_32
+			ival: NATURAL_32
+			pos: INTEGER
+			c_is_digit: BOOLEAN
+		do
+				--| pos is index in stream of escape character ('%')
+			pos := a_position.item
+			c := v.code (pos + 1)
+			if c = 85 or c = 117 then -- 117 'u'  85 'U'
+					-- NOTE: this is not a standard, but it can occur, so use this for decoding only
+					-- An escaped Unicode (ucs2) value, from ECMA scripts
+					--   has the form: %u<n> where <n> is the UCS value
+					--   of the character (two byte integer, one to 4 chars
+					--   after escape sequence).
+					-- See: http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations
+					-- UTF-8 result can be 1 to 4 characters.
+				from
+					i := pos + 2
+					n := v.count
+				until
+					(i > n) or not_a_digit
+				loop
+					c := v.code (i)
+					c_is_digit := (48 <= c and c <= 57) -- DIGIT: 0 .. 9
+					if
+						   c_is_digit
+						or (97 <= c and c <= 102)	-- ALPHA: a..f
+						or (65 <= c and c <= 70)	-- ALPHA: A..F
+					then
+						ival := ival * 16
+						if c_is_digit then
+							ival := ival + (c - 48) -- 48 '0'
+						else
+							if c > 70 then -- a..f
+								ival := ival + (c - 97) + 10 -- 97 'a'
+							else -- A..F
+								ival := ival + (c - 65) + 10 -- 65 'A'
+							end
+						end
+						i := i + 1
+					else
+						not_a_digit := True
+						i := i - 1
+					end
+				end
+				a_position.replace (i)
+				Result := ival
+			else
+					-- ASCII char?
+				ascii_pos := hexadecimal_string_to_natural_32 (v.substring (pos + 1, pos + 2))
+				Result := ascii_pos
+				a_position.replace (pos + 2)
+			end
+		end
+
+	next_percent_decoded_unicode_character_code (v: READABLE_STRING_GENERAL; a_position: CELL [INTEGER]): NATURAL_32
+			-- Next decoded character from `v' at position `a_position.item'
+			-- note: it also updates `a_position' to indicate the new index position.
+		require
+			valid_start: a_position.item <= v.count
+			is_percent_char: v.code (a_position.item) = 37 -- 37 '%%'
+		local
+			n, j: INTEGER
+			c: NATURAL_32
+			c1, c2, c3, c4: NATURAL_32
+			pr: CELL [INTEGER]
+		do
+			create pr.put (a_position.item)
+			c1 := next_percent_decoded_character_code (v, pr)
+
+			j := pr.item
+			n := v.count
+
+			Result := c1
+			a_position.replace (j)
+
+			if c1 <= 0x7F then
+					-- 0xxxxxxx
+				Result := c1
+			elseif c1 <= 0xDF then
+					-- 110xxxxx 10xxxxxx
+				if j + 2 <= n then
+					c := v.code (j + 1)
+					if c = 37 then -- 37 '%%'
+						pr.replace (j + 1)
+						c2 := next_percent_decoded_character_code (v, pr)
+						j := pr.item
+						Result := (
+									((c1 & 0x1F) |<< 6) |
+									( c2 & 0x3F       )
+								)
+						a_position.replace (j)
+					else
+						-- Do not try to decode
+					end
+				end
+			elseif c1 <= 0xEF then
+					-- 1110xxxx 10xxxxxx 10xxxxxx
+				if j + 2 <= n then
+					c := v.code (j + 1)
+					if c = 37 then -- 37 '%%'
+						pr.replace (j + 1)
+						c2 := next_percent_decoded_character_code (v, pr)
+						j := pr.item
+						if j + 2 <= n then
+							c := v.code (j + 1)
+							if c = 37 then -- 37 '%%'
+								pr.replace (j + 1)
+								c3 := next_percent_decoded_character_code (v, pr)
+								j := pr.item
+
+								Result := (
+										((c1 & 0xF)  |<< 12) |
+										((c2 & 0x3F) |<<  6) |
+										( c3 & 0x3F        )
+									)
+								a_position.replace (j)
+							else
+								-- Do not try to decode
+							end
+						end
+					else
+						-- Do not try to decode
+					end
+				end
+			elseif c1 <= 0xF7 then
+					-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+				if j + 2 <= n then
+					c := v.code (j + 1)
+					if c = 37 then -- 37 '%%'
+						pr.replace (j + 1)
+						c2 := next_percent_decoded_character_code (v, pr)
+						j := pr.item
+						if j + 2 <= n then
+							c := v.code (j + 1)
+							if c = 37 then -- 37 '%%'
+								pr.replace (j + 1)
+								c3 := next_percent_decoded_character_code (v, pr)
+								j := pr.item
+								if j + 2 <= n then
+									c := v.code (j + 1)
+									if c = 37 then -- 37 '%%'
+										pr.replace (j + 1)
+										c4 := next_percent_decoded_character_code (v, pr)
+										j := pr.item
+
+										a_position.replace (j)
+
+										Result := (
+												((c1 & 0x7)  |<< 18 ) |
+												((c2 & 0x3F) |<< 12) |
+												((c3 & 0x3F) |<<  6) |
+												( c4 & 0x3F        )
+											)
+									else
+										-- Do not try to decode
+									end
+								end
+							else
+								-- Do not try to decode
+							end
+						end
+					else
+						-- Do not try to decode
+					end
+				end
+			else
+				Result := c1
+			end
+		end
+
+feature -- RFC and characters
+
+	is_hexa_decimal_character (c: CHARACTER_32): BOOLEAN
+			-- Is hexadecimal character ?
+		do
+			Result :=  ('a' <= c and c <= 'f') or ('A' <= c and c <= 'F')	-- HEXA
+					or ('0' <= c and c <= '9') 								-- DIGIT
+		end
+
+	is_alpha_or_digit_character (c: CHARACTER_32): BOOLEAN
+			-- Is ALPHA or DIGIT character ?
+		do
+			Result :=  ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z')	-- ALPHA
+					or ('0' <= c and c <= '9') 								-- DIGIT
+		end
+
+	is_alpha_character (c: CHARACTER_32): BOOLEAN
+			-- Is ALPHA character ?
+		do
+			Result := ('a' <= c and c <= 'z') or ('A' <= c and c <= 'Z')
+		end
+
+	is_digit_character (c: CHARACTER_32): BOOLEAN
+			-- Is DIGIT character ?
+		do
+			Result := ('0' <= c and c <= '9')
+		end
+
+	is_unreserved_character (c: CHARACTER_32): BOOLEAN
+			-- unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
+		do
+			if
+				   ('a' <= c and c <= 'z')	-- ALPHA
+				or ('A' <= c and c <= 'Z')	-- ALPHA
+				or ('0' <= c and c <= '9') 	-- DIGIT
+			then
+				Result := True
+			else
+				inspect c
+				when '-', '_', '.', '~' then -- unreserved
+					Result := True
+				else
+				end
+			end
+		end
+
+	is_reserved_character (c: CHARACTER_32): BOOLEAN
+			-- reserved    = gen-delims / sub-delims
+		do
+			Result := is_gen_delims_character (c) or is_sub_delims_character (c)
+		end
+
+	is_gen_delims_character (c: CHARACTER_32): BOOLEAN
+			-- gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"	
+		do
+			inspect c
+			when ':' , '/', '?' , '#' , '[' , ']' , '@' then
+				Result := True
+			else
+			end
+		end
+
+	is_sub_delims_character (c: CHARACTER_32): BOOLEAN
+			-- sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
+            --				   / "*" / "+" / "," / ";" / "="
+		do
+			inspect c
+			when '!' , '$' , '&' , '%'' , '(' , ')' , '*' , '+' , ',' , ';' , '=' then -- sub-delims
+				Result := True
+			else
+			end
+		end
+
+feature {NONE} -- Implementation
+
+ 	hex_digit: SPECIAL [NATURAL_32]
+ 			-- Hexadecimal digits.
+ 		once
+ 			create Result.make_filled (0, 16)
+ 			Result [0] := {NATURAL_32} 48 -- 48 '0'
+ 			Result [1] := {NATURAL_32} 49 -- 49 '1'
+ 			Result [2] := {NATURAL_32} 50 -- 50 '2'
+ 			Result [3] := {NATURAL_32} 51 -- 51 '3'
+ 			Result [4] := {NATURAL_32} 52 -- 52 '4'
+ 			Result [5] := {NATURAL_32} 53 -- 53 '5'
+ 			Result [6] := {NATURAL_32} 54 -- 54 '6'
+ 			Result [7] := {NATURAL_32} 55 -- 55 '7'
+ 			Result [8] := {NATURAL_32} 56 -- 56 '8'
+ 			Result [9] := {NATURAL_32} 57 -- 57 '9'
+ 			Result [10] := {NATURAL_32} 65 -- 65 'A'
+ 			Result [11] := {NATURAL_32} 66 -- 66 'B'
+ 			Result [12] := {NATURAL_32} 67 -- 67 'C'
+ 			Result [13] := {NATURAL_32} 68 -- 68 'D'
+ 			Result [14] := {NATURAL_32} 69 -- 69 'E'
+ 			Result [15] := {NATURAL_32} 70 -- 70 'F'
+ 		end
+
+	is_hexa_decimal (a_string: READABLE_STRING_GENERAL): BOOLEAN
+			-- Is `a_string' a valid hexadecimal sequence?
+		local
+			l_convertor: like ctoi_convertor
+		do
+			l_convertor := ctoi_convertor
+			l_convertor.parse_string_with_type (a_string, {NUMERIC_INFORMATION}.type_natural_32)
+			Result := l_convertor.is_integral_integer
+		end
+
+	hexadecimal_string_to_natural_32 (a_hex_string: READABLE_STRING_GENERAL): NATURAL_32
+			-- Convert hexadecimal value `a_hex_string' to its corresponding NATURAL_32 value.
+		require
+			is_hexa: is_hexa_decimal (a_hex_string)
+		local
+			l_convertor: like ctoi_convertor
+		do
+			l_convertor := ctoi_convertor
+			l_convertor.parse_string_with_type (a_hex_string, {NUMERIC_INFORMATION}.type_no_limitation)
+			Result := l_convertor.parsed_natural_32
+		end
+
+	ctoi_convertor: HEXADECIMAL_STRING_TO_INTEGER_CONVERTER
+			-- Converter used to convert string to integer or natural.
+		once
+			create Result.make
+			Result.set_leading_separators_acceptable (False)
+			Result.set_trailing_separators_acceptable (False)
+		ensure
+			ctoi_convertor_not_void: Result /= Void
+		end
+
+note
+	copyright: "2011-2017, Jocelyn Fiat, Javier Velilla, Eiffel Software and others"
+	license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
+	source: "[
+			Eiffel Software
+			5949 Hollister Ave., Goleta, CA 93117 USA
+			Telephone 805-685-1006, Fax 805-685-6869
+			Website http://www.eiffel.com
+			Customer support http://support.eiffel.com
+		]"
+end