Files
EWF/library/text/encoder/src/url_encoder.e
Jocelyn Fiat fbec89f354 Better signature for encoders
Split library .ecf and the autotest .ecf
2012-02-14 18:03:42 +01:00

376 lines
8.3 KiB
Plaintext

note
description: "[
Summary description for {URL_ENCODER}.
See: http://www.faqs.org/rfcs/rfc3986.html
]"
legal: "See notice at end of class."
status: "See notice at end of class."
date: "$Date$"
revision: "$Revision$"
class
URL_ENCODER
inherit
ENCODER [READABLE_STRING_32, READABLE_STRING_8]
PLATFORM
export
{NONE} all
end
feature -- Access
name: READABLE_STRING_8
do
create {IMMUTABLE_STRING_8} Result.make_from_string ("URL-encoded")
end
feature -- Status report
has_error: BOOLEAN
feature -- Encoder
encoded_string (s: READABLE_STRING_32): STRING_8
-- URL-encoded value of `s'.
local
i, n: INTEGER
uc: CHARACTER_32
c: CHARACTER_8
do
has_error := False
create Result.make (s.count + s.count // 10)
n := s.count
from i := 1 until i > n loop
uc := s.item (i)
if uc.is_character_8 then
c := uc.to_character_8
inspect c
when
'A' .. 'Z',
'a' .. 'z', '0' .. '9',
'.', '-', '~', '_'
then
Result.extend (c)
when ' ' then
Result.extend ('+')
else
Result.append (url_encoded_char (uc))
end
else
Result.append (url_encoded_char (uc))
end
i := i + 1
end
end
partial_encoded_string (s: READABLE_STRING_32; a_ignore: ARRAY [CHARACTER]): READABLE_STRING_8
-- URL-encoded value of `s'.
local
i, n: INTEGER
uc: CHARACTER_32
c: CHARACTER_8
s8: STRING_8
do
has_error := False
create s8.make (s.count + s.count // 10)
Result := s8
n := s.count
from i := 1 until i > n loop
uc := s.item (i)
if uc.is_character_8 then
c := uc.to_character_8
inspect c
when
'A' .. 'Z',
'a' .. 'z', '0' .. '9',
'.', '-', '~', '_'
then
s8.extend (c)
when ' ' then
s8.extend ('+')
else
if a_ignore.has (c) then
s8.extend (c)
else
s8.append (url_encoded_char (uc))
end
end
else
if a_ignore.has (c) then
s8.extend (c)
else
s8.append (url_encoded_char (uc))
end
end
i := i + 1
end
end
feature {NONE} -- encoder character
url_encoded_char (uc: CHARACTER_32): STRING_8
do
create Result.make (3)
if uc.is_character_8 then
Result.extend ('%%')
Result.append (uc.code.to_hex_string)
from
until
Result.count < 2 or else Result[2] /= '0'
loop
Result.remove (2)
end
else
has_error := True --| Non-ascii escape not currently supported
end
ensure
exists: Result /= Void
end
feature -- Decoder
decoded_string (v: READABLE_STRING_8): STRING_32
-- The URL-encoded equivalent of the given string
local
i, n: INTEGER
c: CHARACTER
pr: CELL [INTEGER]
changed: BOOLEAN
do
has_error := False
n := v.count
create Result.make (n)
from i := 1
until i > n
loop
c := v.item (i)
inspect c
when '+' then
changed := True
Result.append_character ({CHARACTER_32}' ')
when '%%' then
-- An escaped character ?
if i = n then
Result.append_character (c.to_character_32)
else
changed := True
create pr.put (i)
Result.append (url_decoded_char (v, pr))
i := pr.item
end
else
Result.append_character (c.to_character_32)
end
i := i + 1
end
end
feature {NONE} -- decoded character
url_decoded_char (buf: STRING_8; posr: CELL [INTEGER]): STRING_32
-- Character(s) resulting from decoding the URL-encoded string
require
stream_exists: buf /= Void
posr_exists: posr /= Void
valid_start: posr.item <= buf.count
local
c: CHARACTER
i, n, nb: INTEGER
not_a_digit: BOOLEAN
ascii_pos, ival: INTEGER
pos: INTEGER
do
--| pos is index in stream of escape character ('%')
pos := posr.item
create Result.make (4)
if buf.item (pos + 1) = 'u' then
-- An escaped Unicode (ucs2) value, from ECMA scripts
-- Has the form: %u<n> where <n> is the UCS value
-- of the character (two byte integer, one to 4 chars
-- after escape sequence).
-- UTF-8 result can be 1 to 4 characters
n := buf.count
from i := pos + 2
until (i > n) or not_a_digit
loop
c := buf.item (i)
if c.is_hexa_digit then
ival := ival * 16
if c.is_digit then
ival := ival + (c |-| '0')
else
ival := ival + (c.upper |-| 'A') + 10
end
i := i + 1
else
not_a_digit := True
end
end
posr.replace (i)
-- ival is now UCS2 value; needs conversion to UTF8
Result.append_code (ival.as_natural_32)
nb := utf8_bytes_in_sequence (buf, pos)
else
-- ASCII char?
ascii_pos := hex_to_integer_32 (buf.substring (pos+1, pos+2))
if ascii_pos >= 0x80 and ascii_pos <= 0xff then
-- Might be improperly escaped
Result.append_code (ascii_pos.as_natural_32)
posr.replace (pos + 2)
else
Result.append_code (ascii_pos.as_natural_32)
posr.replace (pos + 2)
end
end
ensure
exists: Result /= Void
end
feature {NONE} -- UTF8
utf8_bytes_in_sequence (s: STRING_8; spos: INTEGER): INTEGER
-- If the given character is a legal first byte element in a
-- utf8 byte sequence (aka character), then return the number
-- of bytes in that sequence
-- Result of zero means it's not a utf8 first byte
require
exists: s /= Void
long_enough: s.count >= spos
do
Result := bytes_in_utf8_char (s.item (spos))
end
bytes_in_utf8_char (v: CHARACTER_8): INTEGER
-- If the given byte a legal first byte element in a utf8 sequence,
-- then the number of bytes in that character
-- Zero denotes an error, i.e. not a legal UTF8 char
--
-- The first byte of a UTF8 encodes the length
local
c: NATURAL_8
do
c := v.code.to_natural_8
Result := 1 -- 7 bit ASCII
if (c & 0x80) /= 0 then
-- Hi bit means not ASCII
Result := 0
if (c & 0xe0) = 0xc0 then
-- If we see a first byte as b110xxxxx
-- then we expect a two-byte character
Result := 2
elseif (c & 0xf0) = 0xe0 then
-- If we see a first byte as b1110xxxx
-- then we expect a three-byte character
Result := 3
elseif (c & 0xf8) = 0xf0 then
-- If we see a first byte as b11110xxx
-- then we expect a four-byte character
Result := 4
elseif (c & 0xfc) = 0xf8 then
-- If we see a first byte as b111110xx
-- then we expect a five-byte character
Result := 5
elseif (c & 0xfe) = 0xfc then
-- If we see a first byte as b1111110x
-- then we expect a six-byte character
Result := 6
end
end
end
feature {NONE} -- Hexadecimal and strings
hex_to_integer_32 (s: STRING): INTEGER_32
-- Hexadecimal string `s' converted to INTEGER_32 value
require
s_not_void: s /= Void
local
i, nb: INTEGER;
char: CHARACTER
do
nb := s.count
if nb >= 2 and then s.item (2) = 'x' then
i := 3
else
i := 1
end
from
until
i > nb
loop
Result := Result * 16
char := s.item (i)
if char >= '0' and then char <= '9' then
Result := Result + (char |-| '0')
else
Result := Result + (char.lower |-| 'a' + 10)
end
i := i + 1
end
end
hex_to_integer_64 (s: STRING): INTEGER_64
-- Hexadecimal string `s' converted to INTEGER_64 value
require
s_not_void: s /= Void
local
i, nb: INTEGER;
char: CHARACTER
do
nb := s.count
if nb >= 2 and then s.item (2) = 'x' then
i := 3
else
i := 1
end
from
until
i > nb
loop
Result := Result * 16
char := s.item (i)
if char >= '0' and then char <= '9' then
Result := Result + (char |-| '0')
else
Result := Result + (char.lower |-| 'a' + 10)
end
i := i + 1
end
end
hex_to_pointer (s: STRING): POINTER
-- Hexadecimal string `s' converted to POINTER value
require
s_not_void: s /= Void
local
val_32: INTEGER_32
val_64: INTEGER_64
do
if Pointer_bytes = Integer_64_bytes then
val_64 := hex_to_integer_64 (s)
($Result).memory_copy ($val_64, Pointer_bytes)
else
val_32 := hex_to_integer_32 (s)
($Result).memory_copy ($val_32, Pointer_bytes)
end
end
note
copyright: "2011-2012, Eiffel Software and others"
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
source: "[
Eiffel Software
5949 Hollister Ave., Goleta, CA 93117 USA
Telephone 805-685-1006, Fax 805-685-6869
Website http://www.eiffel.com
Customer support http://support.eiffel.com
]"
end