Better support for unicode path and values.

Added WSF_REQUEST.percent_encoded_path_info: READABLE_STRING_8
    to keep url encoded path info, as it is useful for specific component

The router is now using WSF_REQUEST.percent_encoded_path_info
    since URI_TEMPLATE are handling URI (and not IRI)
    this fixes an issue with unicode path parameters.

This should not break existing code, and this fixes various unicode related issues related
   to PATH parameter and path info
   but also any component using file names.

(required EiffelStudio >= 7.2)
This commit is contained in:
2013-06-12 18:00:55 +02:00
parent a982286dd4
commit cc4ef1a575
28 changed files with 1056 additions and 449 deletions

View File

@@ -11,22 +11,10 @@
<assertions precondition="true"/>
</option>
<library name="base" location="$ISE_LIBRARY\library\base\base-safe.ecf"/>
<library name="encoding" location="$ISE_LIBRARY\library\encoding\encoding-safe.ecf"/>
<cluster name="src" location="src\" recursive="true">
<file_rule>
<exclude>/tests$</exclude>
<exclude>/spec$</exclude>
</file_rule>
<cluster name="src_before_70" location="$|spec\before_70\" recursive="true">
<condition>
<version type="compiler" max="7.0.8.7585"/>
</condition>
</cluster>
<cluster name="src_70" location="$|spec\70\" recursive="true">
<condition>
<version type="compiler" min="7.0.8.7586"/>
</condition>
</cluster>
</cluster>
</target>
</system>

View File

@@ -11,22 +11,10 @@
<assertions precondition="true"/>
</option>
<library name="base" location="$ISE_LIBRARY\library\base\base.ecf"/>
<library name="encoding" location="$ISE_LIBRARY\library\encoding\encoding.ecf"/>
<cluster name="src" location="src\" recursive="true">
<file_rule>
<exclude>/tests$</exclude>
<exclude>/spec$</exclude>
</file_rule>
<cluster name="src_before_70" location="$\spec\before_70" recursive="true">
<condition>
<version type="compiler" max="7.0.8.7585"/>
</condition>
</cluster>
<cluster name="src_70" location="$\spec\70" recursive="true">
<condition>
<version type="compiler" min="7.0.8.7586"/>
</condition>
</cluster>
</cluster>
</target>
</system>

View File

@@ -1,26 +1,21 @@
note
description : "Objects that ..."
author : "$Author$"
date : "$Date$"
revision : "$Revision$"
description: "Objects to access the shared once UTF8_URL_ENCODER ..."
date: "$Date$"
revision: "$Revision$"
deferred class
UTF8_ENCODER_HELPER
class
SHARED_UTF8_URL_ENCODER
inherit
ANY
feature -- Encoder
UNICODE_CONVERSION
export
{NONE} all
{ANY} is_valid_utf8
undefine
is_little_endian
url_encoder: UTF8_URL_ENCODER
-- Shared UTF8 URL encoder.
once
create Result
end
note
copyright: "2011-2011, Eiffel Software and others"
copyright: "2011-2012, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software
@@ -29,4 +24,5 @@ note
Website http://www.eiffel.com
Customer support http://support.eiffel.com
]"
end

View File

@@ -1,73 +0,0 @@
note
description : "Objects that ..."
author : "$Author$"
date : "$Date$"
revision : "$Revision$"
deferred class
UTF8_ENCODER_HELPER
inherit
ANY
UNICODE_CONVERSION
export
{NONE} all
undefine
is_little_endian
end
feature -- Status report
is_valid_utf8 (a_string: STRING): BOOLEAN
-- Is `a_string' valid UTF-8 string?
require
a_string_not_void: a_string /= Void
local
l_nat8: NATURAL_8
l_code: NATURAL_32
i, nb: INTEGER
do
from
i := 1
nb := a_string.count
Result := True
until
i > nb or not Result
loop
l_nat8 := a_string.code (i).to_natural_8
if l_nat8 <= 127 then
-- Form 0xxxxxxx.
elseif (l_nat8 & 0xE0) = 0xC0 then
-- Form 110xxxxx 10xxxxxx.
l_code := (l_nat8 & 0x1F).to_natural_32 |<< 6
i := i + 1
elseif (l_nat8 & 0xF0) = 0xE0 then
-- Form 1110xxxx 10xxxxxx 10xxxxxx.
i := i + 2
elseif (l_nat8 & 0xF8) = 0xF0 then
-- Form 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
i := i + 3
elseif (l_nat8 & 0xFC) = 0xF8 then
-- Starts with 111110xx
Result := False
else
-- Starts with 1111110x
Result := False
end
i := i + 1
end
end
note
copyright: "2011-2011, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software
5949 Hollister Ave., Goleta, CA 93117 USA
Telephone 805-685-1006, Fax 805-685-6869
Website http://www.eiffel.com
Customer support http://support.eiffel.com
]"
end

View File

@@ -15,8 +15,6 @@ class
inherit
ENCODER [READABLE_STRING_32, READABLE_STRING_8]
UTF8_ENCODER_HELPER
PLATFORM
export
{NONE} all
@@ -37,9 +35,13 @@ feature -- Encoder
encoded_string (s: READABLE_STRING_32): STRING_8
-- UTF8-encoded value of `s'.
do
Result := general_encoded_string (s)
end
general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8
do
Result := utf32_to_utf8 (s)
has_error := not last_conversion_successful
end
feature -- Decoder
@@ -48,11 +50,34 @@ feature -- Decoder
-- The UTF8-encoded equivalent of the given string
do
Result := utf8_to_utf32 (v)
has_error := not last_conversion_successful
has_error := not is_valid_utf8 (v)
end
feature {NONE} -- UTF implementation
utf32_to_utf8 (s: READABLE_STRING_GENERAL): STRING_8
local
utf: UTF_CONVERTER
do
Result := utf.utf_32_string_to_utf_8_string_8 (s)
end
utf8_to_utf32 (s: READABLE_STRING_8): STRING_32
local
utf: UTF_CONVERTER
do
Result := utf.utf_8_string_8_to_string_32 (s)
end
is_valid_utf8 (s: READABLE_STRING_8): BOOLEAN
local
utf: UTF_CONVERTER
do
Result := utf.is_valid_utf_8_string_8 (s)
end
note
copyright: "2011-2012, Eiffel Software and others"
copyright: "2011-2013, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software

View File

@@ -15,23 +15,24 @@ class
inherit
URL_ENCODER
redefine
default_create,
name,
general_encoded_string,
encoded_string, partial_encoded_string,
decoded_string
select
encoded_string,
decoded_string,
has_error
end
UTF8_ENCODER_HELPER
UTF8_ENCODER
rename
general_encoded_string as utf8_general_encoded_string,
encoded_string as utf8_encoded_string,
decoded_string as utf8_decoded_string,
has_error as utf8_has_error
redefine
default_create
end
feature {NONE} -- Initialization
default_create
do
Precursor {UTF8_ENCODER_HELPER}
name
end
feature -- Access
@@ -46,27 +47,22 @@ feature -- Encoder
encoded_string (s: READABLE_STRING_32): STRING_8
-- URL-encoded value of `s'.
do
Result := utf32_to_utf8 (s)
Result := Precursor (Result)
Result := general_encoded_string (s)
end
general_encoded_string (s: READABLE_STRING_GENERAL): STRING_8
do
if attached {READABLE_STRING_32} s as s32 then
Result := utf32_to_utf8 (s32)
else
Result := s.as_string_8
end
Result := Precursor (Result)
Result := utf8_general_encoded_string (s)
Result := Precursor {URL_ENCODER} (Result)
has_error := has_error or utf8_has_error
end
partial_encoded_string (s: READABLE_STRING_GENERAL; a_ignore: ARRAY [CHARACTER]): STRING_8
-- URL-encoded value of `s'.
do
Result := Precursor (s, a_ignore)
if not has_error then
Result := utf32_to_utf8 (Result)
end
Result := utf8_general_encoded_string (s)
Result := Precursor {URL_ENCODER} (Result, a_ignore)
has_error := has_error or utf8_has_error
end
feature -- Decoder
@@ -74,17 +70,15 @@ feature -- Decoder
decoded_string (v: READABLE_STRING_8): STRING_32
-- The URL-encoded equivalent of the given string
do
Result := Precursor (v)
Result := Precursor {URL_ENCODER} (v)
if not has_error then
if is_valid_utf8 (Result) then
Result := utf8_to_utf32 (Result)
has_error := not last_conversion_successful
end
Result := utf8_decoded_string (Result)
has_error := utf8_has_error
end
end
note
copyright: "2011-2012, Eiffel Software and others"
copyright: "2011-2013, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software