note description: "[ Summary description for {URL_ENCODER}. See: http://www.faqs.org/rfcs/rfc3986.html ]" legal: "See notice at end of class." status: "See notice at end of class." date: "$Date$" revision: "$Revision$" class URL_ENCODER inherit ENCODER [READABLE_STRING_32, READABLE_STRING_8] PLATFORM export {NONE} all end feature -- Access name: READABLE_STRING_8 do create {IMMUTABLE_STRING_8} Result.make_from_string ("URL-encoded") end feature -- Status report has_error: BOOLEAN feature -- Encoder encoded_string (s: READABLE_STRING_32): STRING_8 -- URL-encoded value of `s'. local i, n: INTEGER uc: CHARACTER_32 c: CHARACTER_8 do has_error := False create Result.make (s.count + s.count // 10) n := s.count from i := 1 until i > n loop uc := s.item (i) if uc.is_character_8 then c := uc.to_character_8 inspect c when 'A' .. 'Z', 'a' .. 'z', '0' .. '9', '.', '-', '~', '_' then Result.extend (c) else Result.append (url_encoded_char (uc)) end else Result.append (url_encoded_char (uc)) end i := i + 1 end end partial_encoded_string (s: READABLE_STRING_32; a_ignore: ARRAY [CHARACTER]): READABLE_STRING_8 -- URL-encoded value of `s'. local i, n: INTEGER uc: CHARACTER_32 c: CHARACTER_8 s8: STRING_8 do has_error := False create s8.make (s.count + s.count // 10) Result := s8 n := s.count from i := 1 until i > n loop uc := s.item (i) if uc.is_character_8 then c := uc.to_character_8 inspect c when 'A' .. 'Z', 'a' .. 'z', '0' .. '9', '.', '-', '~', '_' then s8.extend (c) else if a_ignore.has (c) then s8.extend (c) else s8.append (url_encoded_char (uc)) end end else if a_ignore.has (c) then s8.extend (c) else s8.append (url_encoded_char (uc)) end end i := i + 1 end end feature {NONE} -- encoder character url_encoded_char (uc: CHARACTER_32): STRING_8 do create Result.make (3) if uc.is_character_8 then Result.extend ('%%') Result.append (uc.code.to_hex_string) from until Result.count < 2 or else Result[2] /= '0' loop Result.remove (2) end else has_error := True --| Non-ascii escape not currently supported end ensure exists: Result /= Void end feature -- Decoder decoded_string (v: READABLE_STRING_8): STRING_32 -- The URL-encoded equivalent of the given string local i, n: INTEGER c: CHARACTER pr: CELL [INTEGER] changed: BOOLEAN do has_error := False n := v.count create Result.make (n) from i := 1 until i > n loop c := v.item (i) inspect c when '+' then changed := True Result.append_character ({CHARACTER_32}' ') when '%%' then -- An escaped character ? if i = n then Result.append_character (c.to_character_32) else changed := True create pr.put (i) Result.append (url_decoded_char (v, pr)) i := pr.item end else Result.append_character (c.to_character_32) end i := i + 1 end end feature {NONE} -- decoded character url_decoded_char (buf: STRING_8; posr: CELL [INTEGER]): STRING_32 -- Character(s) resulting from decoding the URL-encoded string require stream_exists: buf /= Void posr_exists: posr /= Void valid_start: posr.item <= buf.count local c: CHARACTER i, n, nb: INTEGER not_a_digit: BOOLEAN ascii_pos, ival: INTEGER pos: INTEGER do --| pos is index in stream of escape character ('%') pos := posr.item create Result.make (4) if buf.item (pos + 1) = 'u' then -- An escaped Unicode (ucs2) value, from ECMA scripts -- Has the form: %u where is the UCS value -- of the character (two byte integer, one to 4 chars -- after escape sequence). -- UTF-8 result can be 1 to 4 characters n := buf.count from i := pos + 2 until (i > n) or not_a_digit loop c := buf.item (i) if c.is_hexa_digit then ival := ival * 16 if c.is_digit then ival := ival + (c |-| '0') else ival := ival + (c.upper |-| 'A') + 10 end i := i + 1 else not_a_digit := True end end posr.replace (i) -- ival is now UCS2 value; needs conversion to UTF8 Result.append_code (ival.as_natural_32) nb := utf8_bytes_in_sequence (buf, pos) else -- ASCII char? ascii_pos := hex_to_integer_32 (buf.substring (pos+1, pos+2)) if ascii_pos >= 0x80 and ascii_pos <= 0xff then -- Might be improperly escaped Result.append_code (ascii_pos.as_natural_32) posr.replace (pos + 2) else Result.append_code (ascii_pos.as_natural_32) posr.replace (pos + 2) end end ensure exists: Result /= Void end feature {NONE} -- UTF8 utf8_bytes_in_sequence (s: STRING_8; spos: INTEGER): INTEGER -- If the given character is a legal first byte element in a -- utf8 byte sequence (aka character), then return the number -- of bytes in that sequence -- Result of zero means it's not a utf8 first byte require exists: s /= Void long_enough: s.count >= spos do Result := bytes_in_utf8_char (s.item (spos)) end bytes_in_utf8_char (v: CHARACTER_8): INTEGER -- If the given byte a legal first byte element in a utf8 sequence, -- then the number of bytes in that character -- Zero denotes an error, i.e. not a legal UTF8 char -- -- The first byte of a UTF8 encodes the length local c: NATURAL_8 do c := v.code.to_natural_8 Result := 1 -- 7 bit ASCII if (c & 0x80) /= 0 then -- Hi bit means not ASCII Result := 0 if (c & 0xe0) = 0xc0 then -- If we see a first byte as b110xxxxx -- then we expect a two-byte character Result := 2 elseif (c & 0xf0) = 0xe0 then -- If we see a first byte as b1110xxxx -- then we expect a three-byte character Result := 3 elseif (c & 0xf8) = 0xf0 then -- If we see a first byte as b11110xxx -- then we expect a four-byte character Result := 4 elseif (c & 0xfc) = 0xf8 then -- If we see a first byte as b111110xx -- then we expect a five-byte character Result := 5 elseif (c & 0xfe) = 0xfc then -- If we see a first byte as b1111110x -- then we expect a six-byte character Result := 6 end end end feature {NONE} -- Hexadecimal and strings hex_to_integer_32 (s: STRING): INTEGER_32 -- Hexadecimal string `s' converted to INTEGER_32 value require s_not_void: s /= Void local i, nb: INTEGER; char: CHARACTER do nb := s.count if nb >= 2 and then s.item (2) = 'x' then i := 3 else i := 1 end from until i > nb loop Result := Result * 16 char := s.item (i) if char >= '0' and then char <= '9' then Result := Result + (char |-| '0') else Result := Result + (char.lower |-| 'a' + 10) end i := i + 1 end end hex_to_integer_64 (s: STRING): INTEGER_64 -- Hexadecimal string `s' converted to INTEGER_64 value require s_not_void: s /= Void local i, nb: INTEGER; char: CHARACTER do nb := s.count if nb >= 2 and then s.item (2) = 'x' then i := 3 else i := 1 end from until i > nb loop Result := Result * 16 char := s.item (i) if char >= '0' and then char <= '9' then Result := Result + (char |-| '0') else Result := Result + (char.lower |-| 'a' + 10) end i := i + 1 end end hex_to_pointer (s: STRING): POINTER -- Hexadecimal string `s' converted to POINTER value require s_not_void: s /= Void local val_32: INTEGER_32 val_64: INTEGER_64 do if Pointer_bytes = Integer_64_bytes then val_64 := hex_to_integer_64 (s) ($Result).memory_copy ($val_64, Pointer_bytes) else val_32 := hex_to_integer_32 (s) ($Result).memory_copy ($val_32, Pointer_bytes) end end note copyright: "2011-2012, Eiffel Software and others" license: "Eiffel Forum License v2 (see http://www.eiffel.com/licensing/forum.txt)" source: "[ Eiffel Software 5949 Hollister Ave., Goleta, CA 93117 USA Telephone 805-685-1006, Fax 805-685-6869 Website http://www.eiffel.com Customer support http://support.eiffel.com ]" end