Handling UTF-8 unencoding for WSF_VALUE ... Added WSF_TABLE_VALUE to handle list[]=a&list[]=b ... Library encoder: added UTF8 facilities
272 lines
5.4 KiB
Plaintext
272 lines
5.4 KiB
Plaintext
note
|
|
description: "[
|
|
Summary description for {HTML_ENCODER}.
|
|
|
|
see: http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
|
|
]"
|
|
legal: "See notice at end of class."
|
|
status: "See notice at end of class."
|
|
date: "$Date$"
|
|
revision: "$Revision$"
|
|
|
|
class
|
|
HTML_ENCODER
|
|
|
|
inherit
|
|
ENCODER [STRING_32, STRING_8]
|
|
|
|
PLATFORM
|
|
export
|
|
{NONE} all
|
|
end
|
|
|
|
feature -- Access
|
|
|
|
name: READABLE_STRING_8
|
|
do
|
|
create {IMMUTABLE_STRING_8} Result.make_from_string ("HTML-encoded")
|
|
end
|
|
|
|
feature -- Status report
|
|
|
|
has_error: BOOLEAN
|
|
|
|
feature -- Encoder
|
|
|
|
encoded_string (s: STRING_32): STRING_8
|
|
-- HTML-encoded value of `s'.
|
|
local
|
|
i, n: INTEGER
|
|
uc: CHARACTER_32
|
|
c: CHARACTER_8
|
|
do
|
|
has_error := False
|
|
create Result.make (s.count + s.count // 10)
|
|
n := s.count
|
|
from i := 1 until i > n loop
|
|
uc := s.item (i)
|
|
if uc.is_character_8 then
|
|
c := uc.to_character_8
|
|
inspect c
|
|
when '%"' then Result.append_string (""")
|
|
when '&' then Result.append_string ("&")
|
|
when '%'' then Result.append_string ("'")
|
|
when '<' then Result.append_string ("<")
|
|
when '>' then Result.append_string (">")
|
|
else
|
|
Result.extend (c)
|
|
end
|
|
else
|
|
Result.append ("&#")
|
|
Result.append (uc.code.out)
|
|
Result.extend (';')
|
|
end
|
|
i := i + 1
|
|
end
|
|
end
|
|
|
|
feature -- Decoder
|
|
|
|
decoded_string (v: STRING_8): STRING_32
|
|
-- The HTML-encoded equivalent of the given string
|
|
local
|
|
i, n: INTEGER
|
|
c: CHARACTER
|
|
cl_i: CELL [INTEGER]
|
|
do
|
|
has_error := False
|
|
n := v.count
|
|
create Result.make (n)
|
|
create cl_i.put (0)
|
|
from i := 1 until i > n loop
|
|
c := v.item (i)
|
|
if c = '&' then
|
|
cl_i.replace (i)
|
|
Result.append_string (next_entity (v, cl_i))
|
|
i := cl_i.item
|
|
else
|
|
Result.append_character (c.to_character_32)
|
|
i := i + 1
|
|
end
|
|
end
|
|
end
|
|
|
|
feature {NONE} -- Implementation: decoder
|
|
|
|
next_entity (v: STRING_8; cl_i: CELL [INTEGER]): STRING_32
|
|
-- Return next entity value
|
|
-- move index
|
|
local
|
|
i: INTEGER
|
|
c: CHARACTER
|
|
is_char: BOOLEAN
|
|
is_hexa: BOOLEAN
|
|
s: STRING_32
|
|
do
|
|
i := cl_i.item
|
|
create s.make_empty
|
|
i := i + 1
|
|
c := v[i]
|
|
if c = '#' then
|
|
is_char := True
|
|
i := i + 1
|
|
c := v[i]
|
|
if c = 'x' then
|
|
is_hexa := True
|
|
i := i + 1
|
|
c := v[i]
|
|
end
|
|
end
|
|
if is_char then
|
|
if is_hexa then
|
|
from
|
|
until
|
|
not c.is_hexa_digit or c = ';'
|
|
loop
|
|
s.append_character (c)
|
|
i := i + 1
|
|
c := v[i]
|
|
end
|
|
else
|
|
from
|
|
until
|
|
not c.is_digit or c = ';'
|
|
loop
|
|
s.append_character (c)
|
|
i := i + 1
|
|
c := v[i]
|
|
end
|
|
end
|
|
else
|
|
from
|
|
until
|
|
not valid_entity_character (c) or c = ';'
|
|
loop
|
|
s.append_character (c)
|
|
i := i + 1
|
|
c := v[i]
|
|
end
|
|
end
|
|
if c = ';' then
|
|
if is_char then
|
|
if is_hexa then
|
|
-- not yet implemented
|
|
s.prepend_character ('x')
|
|
s.prepend_character ('#')
|
|
s.prepend_character ('&')
|
|
s.append_character (';')
|
|
Result := s
|
|
elseif s.is_integer then
|
|
create Result.make (1)
|
|
Result.append_code (s.to_natural_32)
|
|
else
|
|
s.prepend_character ('&')
|
|
s.append_character (';')
|
|
Result := s
|
|
end
|
|
else
|
|
resolve_entity (s)
|
|
Result := s
|
|
end
|
|
cl_i.replace (i + 1)
|
|
else
|
|
cl_i.replace (cl_i.item + 1)
|
|
s.prepend_character ('&')
|
|
has_error := True -- ("Invalid entity syntax " + s)
|
|
Result := s
|
|
end
|
|
end
|
|
|
|
resolve_entity (s: STRING_32)
|
|
-- Resolve `s' as an entity
|
|
--| Hardcoding the xml entities " & ' < and >
|
|
require
|
|
s_attached: s /= Void
|
|
local
|
|
l_resolved: BOOLEAN
|
|
do
|
|
inspect s[1].as_lower
|
|
when 'a' then
|
|
if
|
|
s.count = 3 and then
|
|
s[2].as_lower = 'm' and then
|
|
s[3].as_lower = 'p'
|
|
then -- &
|
|
l_resolved := True
|
|
s.wipe_out
|
|
s.append_character ('&')
|
|
elseif -- '
|
|
s.count = 4 and then
|
|
s[2].as_lower = 'p' and then
|
|
s[3].as_lower = 'o' and then
|
|
s[4].as_lower = 's'
|
|
then
|
|
l_resolved := True
|
|
s.wipe_out
|
|
s.append_character ('%'')
|
|
end
|
|
when 'q' then
|
|
if
|
|
s.count = 4 and then
|
|
s[2].as_lower = 'u' and then
|
|
s[3].as_lower = 'o' and then
|
|
s[4].as_lower = 't'
|
|
then -- "
|
|
l_resolved := True
|
|
s.wipe_out
|
|
s.append_character ('%"')
|
|
end
|
|
when 'l' then
|
|
if
|
|
s.count = 2 and then
|
|
s[2].as_lower = 't'
|
|
then -- "
|
|
l_resolved := True
|
|
s.wipe_out
|
|
s.append_character ('<')
|
|
end
|
|
when 'g' then
|
|
if
|
|
s.count = 2 and then
|
|
s[2].as_lower = 't'
|
|
then -- "
|
|
l_resolved := True
|
|
s.wipe_out
|
|
s.append_character ('>')
|
|
end
|
|
else
|
|
end
|
|
if not l_resolved then
|
|
s.prepend_character ('&')
|
|
s.append_character (';')
|
|
end
|
|
end
|
|
|
|
valid_entity_character (c: CHARACTER): BOOLEAN
|
|
-- Is `c' a valid character in html entity value?
|
|
--| such as &
|
|
do
|
|
inspect
|
|
c
|
|
when 'a'..'z' then
|
|
Result := True
|
|
when 'A'..'Z' then
|
|
Result := True
|
|
when '0'..'9' then
|
|
Result := True
|
|
else
|
|
end
|
|
end
|
|
|
|
note
|
|
copyright: "2011-2011, Eiffel Software and others"
|
|
license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)"
|
|
source: "[
|
|
Eiffel Software
|
|
5949 Hollister Ave., Goleta, CA 93117 USA
|
|
Telephone 805-685-1006, Fax 805-685-6869
|
|
Website http://www.eiffel.com
|
|
Customer support http://support.eiffel.com
|
|
]"
|
|
end
|