From 39887c8bdb40d74b59c54cc3eb22c2b9c11b9ae8 Mon Sep 17 00:00:00 2001 From: Jocelyn Fiat Date: Mon, 7 Sep 2015 19:22:50 +0200 Subject: [PATCH] Added initial ATOM and RSS feed parser and generator. (work in progress) --- library/text/parser/feed/feed-safe.ecf | 13 ++ library/text/parser/feed/feed.ecf | 8 + .../parser/feed/src/atom/atom_feed_parser.e | 104 +++++++++ .../parser/feed/src/atom/atom_generator.e | 193 +++++++++++++++++ .../parser/feed/src/feed_default_parsers.e | 65 ++++++ library/text/parser/feed/src/feed_parser.e | 62 ++++++ library/text/parser/feed/src/kernel/feed.e | 88 ++++++++ .../text/parser/feed/src/kernel/feed_author.e | 46 ++++ .../text/parser/feed/src/kernel/feed_entry.e | 175 ++++++++++++++++ .../text/parser/feed/src/kernel/feed_link.e | 56 +++++ .../parser/feed/src/rss/rss_2_feed_parser.e | 125 +++++++++++ .../parser/feed/src/rss/rss_2_generator.e | 133 ++++++++++++ .../parser/feed/src/support/feed_generator.e | 105 ++++++++++ .../parser/feed/src/support/feed_helpers.e | 87 ++++++++ .../feed/src/support/feed_parser_utilities.e | 84 ++++++++ .../src/support/feed_to_string_32_visitor.e | 197 ++++++++++++++++++ .../parser/feed/src/support/feed_visitor.e | 28 +++ library/text/parser/feed/tests/application.e | 91 ++++++++ .../text/parser/feed/tests/atom_test_set.e | 74 +++++++ library/text/parser/feed/tests/rss_test_set.e | 60 ++++++ library/text/parser/feed/tests/tests-safe.ecf | 14 ++ library/text/parser/feed/tests/tests.ecf | 12 ++ 22 files changed, 1820 insertions(+) create mode 100644 library/text/parser/feed/feed-safe.ecf create mode 100644 library/text/parser/feed/feed.ecf create mode 100644 library/text/parser/feed/src/atom/atom_feed_parser.e create mode 100644 library/text/parser/feed/src/atom/atom_generator.e create mode 100644 library/text/parser/feed/src/feed_default_parsers.e create mode 100644 library/text/parser/feed/src/feed_parser.e create mode 100644 library/text/parser/feed/src/kernel/feed.e create mode 100644 library/text/parser/feed/src/kernel/feed_author.e create mode 100644 library/text/parser/feed/src/kernel/feed_entry.e create mode 100644 library/text/parser/feed/src/kernel/feed_link.e create mode 100644 library/text/parser/feed/src/rss/rss_2_feed_parser.e create mode 100644 library/text/parser/feed/src/rss/rss_2_generator.e create mode 100644 library/text/parser/feed/src/support/feed_generator.e create mode 100644 library/text/parser/feed/src/support/feed_helpers.e create mode 100644 library/text/parser/feed/src/support/feed_parser_utilities.e create mode 100644 library/text/parser/feed/src/support/feed_to_string_32_visitor.e create mode 100644 library/text/parser/feed/src/support/feed_visitor.e create mode 100644 library/text/parser/feed/tests/application.e create mode 100644 library/text/parser/feed/tests/atom_test_set.e create mode 100644 library/text/parser/feed/tests/rss_test_set.e create mode 100644 library/text/parser/feed/tests/tests-safe.ecf create mode 100644 library/text/parser/feed/tests/tests.ecf diff --git a/library/text/parser/feed/feed-safe.ecf b/library/text/parser/feed/feed-safe.ecf new file mode 100644 index 00000000..fce34157 --- /dev/null +++ b/library/text/parser/feed/feed-safe.ecf @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/library/text/parser/feed/feed.ecf b/library/text/parser/feed/feed.ecf new file mode 100644 index 00000000..a8902bfa --- /dev/null +++ b/library/text/parser/feed/feed.ecf @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/library/text/parser/feed/src/atom/atom_feed_parser.e b/library/text/parser/feed/src/atom/atom_feed_parser.e new file mode 100644 index 00000000..75c89691 --- /dev/null +++ b/library/text/parser/feed/src/atom/atom_feed_parser.e @@ -0,0 +1,104 @@ +note + description: "[ + ATOM Parser. + + Warning: the implementation may not support the full ATOM specification. + ]" + date: "$Date$" + revision: "$Revision$" + EIS: "name=ATOM at wikipedia", "protocol=URI", "src=https://en.wikipedia.org/wiki/Atom_(standard)" + EIS: "name=RSS at wikipedia", "protocol=URI", "src=https://en.wikipedia.org/wiki/RSS" + EIS: "name=ATOM 1.0 RFC4287", "protocol=URI", "src=https://tools.ietf.org/html/rfc4287" + +class + ATOM_FEED_PARSER + +inherit + FEED_PARSER + +feature -- Access + + name: STRING = "atom1" + -- Associated name. + + is_detected (xdoc: XML_DOCUMENT): BOOLEAN + -- Is `xdoc' an ATOM feed representation? + do + Result := attached {XML_ELEMENT} xdoc.element_by_name ("feed") as x_feed and then + ( + not attached xml_attribute_text (x_feed, "xmlns") as l_xmlns + or else l_xmlns.same_string ("http://www.w3.org/2005/Atom") + ) + end + + feed (xdoc: XML_DOCUMENT): detachable FEED + -- Feed from `xdoc' XML document. + local + l_title: READABLE_STRING_32 + x_entry, x_link: detachable XML_ELEMENT + e: FEED_ENTRY + l_author: FEED_AUTHOR + lnk: FEED_LINK + s: STRING_32 + do + if + attached xdoc.element_by_name ("feed") as x_feed and then +-- (not attached xml_attribute_text (x_feed, "xmlns") as l_xmlns or else l_xmlns.same_string ("http://www.w3.org/2005/Atom")) + attached xml_element_text (x_feed, "title") as t + then + l_title := t + create Result.make (l_title) + Result.set_description (xml_element_text (x_feed, "subtitle")) + Result.set_id (xml_element_text (x_feed, "id")) + Result.set_updated_date_with_text (xml_element_text (x_feed, "updated")) + if attached links_from_xml (x_feed, "link") as l_links then + across + l_links as link_ic + loop + lnk := link_ic.item + Result.links.force (lnk, lnk.relation) + end + end + if attached x_feed.elements_by_name ("entry") as x_entries then + across + x_entries as ic + loop + x_entry := ic.item + if attached xml_element_text (x_entry, "title") as e_title then + create e.make (e_title) + e.set_description (xml_element_text (x_entry, "summary")) + e.set_id (xml_element_text (x_entry, "id")) + e.set_updated_date_with_text (xml_element_text (x_entry, "updated")) + + if attached links_from_xml (x_entry, "link") as l_links then + across + l_links as link_ic + loop + lnk := link_ic.item + e.links.force (lnk, lnk.relation) + end + end + if attached x_entry.element_by_name ("content") as x_content then + e.set_content (xml_element_code (x_content), xml_attribute_text (x_content, "type")) + end + if attached x_entry.element_by_name ("author") as x_author then + if attached x_author.element_by_name ("name") as x_name and then + attached x_name.text as l_author_name + then + create l_author.make (l_author_name) + if attached x_author.element_by_name ("email") as x_email then + l_author.set_email (x_email.text) + end + e.set_author (l_author) + end + end + Result.add_entry (e) + end + end + end + end + end + + + +end diff --git a/library/text/parser/feed/src/atom/atom_generator.e b/library/text/parser/feed/src/atom/atom_generator.e new file mode 100644 index 00000000..c5d3aa01 --- /dev/null +++ b/library/text/parser/feed/src/atom/atom_generator.e @@ -0,0 +1,193 @@ +note + description: "Convert a FEED into an ATOM content." + date: "$Date$" + revision: "$Revision$" + +class + ATOM_GENERATOR + +inherit + FEED_VISITOR + + FEED_GENERATOR + +create + make + +feature {NONE} -- Initialization + + make (a_buffer: STRING_8) + do + buffer := a_buffer + create indentation.make_empty + end + + buffer: STRING_8 + +feature -- Visitor + + visit_feed (a_feed: FEED) + do + buffer.append ("[ + + + ]") + buffer.append_character ('%N') + indent + append_content_tag_to ("title", Void, a_feed.title, buffer) + append_content_tag_to ("subtitle", Void, a_feed.description, buffer) + if attached a_feed.id as l_id then + append_content_tag_to ("id", Void, l_id, buffer) + else + append_content_tag_to ("id", Void, "urn:uuid:" + new_uuid, buffer) + end + + across + a_feed.links as tb + loop + tb.item.accept (Current) + end + if attached a_feed.date as dt then + append_content_tag_to ("updated", Void, date_to_string (dt), buffer) + end + across + a_feed.entries as ic + loop + ic.item.accept (Current) + end + + exdent + buffer.append ("") + end + + visit_entry (a_entry: FEED_ENTRY) + do + buffer.append (indentation) + buffer.append ("%N") + indent + append_content_tag_to ("title", Void, a_entry.title, buffer) + across + a_entry.links as tb + loop + tb.item.accept (Current) + end + if attached a_entry.id as l_id then + append_content_tag_to ("id", Void, l_id, buffer) + else + append_content_tag_to ("id", Void, "urn:uuid:" + new_uuid, buffer) + end + if attached a_entry.date as dt then + append_content_tag_to ("updated", Void, date_to_string (dt), buffer) + end + + append_content_tag_to ("summary", Void, a_entry.description, buffer) + if attached a_entry.content as l_content then + if attached a_entry.content_type_or_default ("xhtml").is_case_insensitive_equal_general ("xhtml") then +-- if l_content.has_substring ("
") then + append_content_tag_to ("content", <<["type", "xhtml"]>>, l_content, buffer) +-- else +-- append_content_tag_to ("content", <<["type", "xhtml"]>>, {STRING_32} "
" + l_content + {STRING_32} "
", buffer) +-- end + else + append_content_tag_to ("content", <<["type", a_entry.content_type]>>, a_entry.content, buffer) + end + end + + if attached a_entry.author as u then + u.accept (Current) + end + exdent + buffer.append (indentation) + buffer.append ("%N") + end + + visit_link (a_link: FEED_LINK) + local + attr: detachable ARRAYED_LIST [TUPLE [name: READABLE_STRING_8; value: READABLE_STRING_32]] + tu: TUPLE [name: READABLE_STRING_8; value: READABLE_STRING_32] + do + create attr.make (2) + if attached a_link.relation as rel and then not rel.is_whitespace then + tu := ["rel", rel] + attr.force (tu) + end + if attached a_link.type as t and then not t.is_whitespace then + tu := ["type", t.as_string_32] + attr.force (tu) + end + tu := ["href", a_link.href.as_string_32] + attr.force (tu) + if attr.is_empty then + attr := Void + end + append_content_tag_to ("link", attr, Void, buffer) + end + + visit_author (a_author: FEED_AUTHOR) + do + buffer.append (indentation) + buffer.append ("%N") + indent + append_content_tag_to ("name", Void, a_author.name, buffer) + append_content_tag_to ("email", Void, a_author.email, buffer) + exdent + buffer.append (indentation) + buffer.append ("%N") + end + +feature {NONE} -- Helpers + + new_uuid: STRING + local + gen: UUID_GENERATOR + do + create gen + Result := gen.generate_uuid.out.as_lower + end + + date_to_string (dt: DATE_TIME): STRING + do + Result := date_to_rfc3339_string (dt) + end + + date_to_rfc3339_string (d: DATE_TIME): STRING + -- 2003-12-13T18:30:02Z + local + i: INTEGER + do + create Result.make_empty + Result.append_integer (d.year) + Result.append_character ('-') + i := d.month + if i < 10 then + Result.append_integer (0) + end + Result.append_integer (i) + Result.append_character ('-') + i := d.day + if i < 10 then + Result.append_integer (0) + end + Result.append_integer (i) + Result.append_character ('T') + i := d.hour + if i < 10 then + Result.append_integer (0) + end + Result.append_integer (i) + Result.append_character (':') + i := d.minute + if i < 10 then + Result.append_integer (0) + end + Result.append_integer (i) + Result.append_character (':') + i := d.second + if i < 10 then + Result.append_integer (0) + end + Result.append_integer (i) + Result.append_character ('Z') + end + +end diff --git a/library/text/parser/feed/src/feed_default_parsers.e b/library/text/parser/feed/src/feed_default_parsers.e new file mode 100644 index 00000000..a7711614 --- /dev/null +++ b/library/text/parser/feed/src/feed_default_parsers.e @@ -0,0 +1,65 @@ +note + description: "Collection of default feed parsers." + date: "$Date$" + revision: "$Revision$" + +class + FEED_DEFAULT_PARSERS + +inherit + ANY + redefine + default_create + end + +create + default_create + +feature {NONE} -- Initialization + + default_create + do + Precursor + create {ARRAYED_LIST [FEED_PARSER]} parsers.make (2) + parsers.force (create {RSS_2_FEED_PARSER}) + parsers.force (create {ATOM_FEED_PARSER}) + end + +feature -- Access + + parsers: LIST [FEED_PARSER] + -- Available Feed parsers. + +feature -- Access + + feed_from_string (a_atom_content: READABLE_STRING_8): detachable FEED + local + p: XML_STANDARD_PARSER + cb_tree: XML_CALLBACKS_FILTER_DOCUMENT + xdoc: XML_DOCUMENT + do + create p.make + create cb_tree.make_null + p.set_callbacks (cb_tree) + p.parse_from_string_8 (a_atom_content) + if p.is_correct then + xdoc := cb_tree.document + Result := feed (xdoc) + end + end + + feed (xdoc: XML_DOCUMENT): like feed_from_string + -- Feed from `xdoc' XML document. + do + across + parsers as ic + until + Result /= Void + loop + if ic.item.is_detected (xdoc) then + Result := ic.item.feed (xdoc) + end + end + end + +end diff --git a/library/text/parser/feed/src/feed_parser.e b/library/text/parser/feed/src/feed_parser.e new file mode 100644 index 00000000..ef047b6c --- /dev/null +++ b/library/text/parser/feed/src/feed_parser.e @@ -0,0 +1,62 @@ +note + description: "[ + Interface common to any FEED parser. + + Usage: + create parser + if attached parser.feed_from_string (l_feed_content) as l_feed then + ... + + ]" + date: "$Date$" + revision: "$Revision$" + +deferred class + FEED_PARSER + +inherit + FEED_PARSER_UTILITIES + +feature -- Access + + name: STRING + -- Associated name. + deferred + ensure + not_blanc: not Result.is_whitespace + end + + is_detected (xdoc: XML_DOCUMENT): BOOLEAN + -- Is `xdoc' an feed representation or Current supported format? + deferred + end + + feed (xdoc: XML_DOCUMENT): detachable FEED + -- Feed from `xdoc' XML document. + require + is_detected: is_detected (xdoc) + deferred + end + +feature -- Basic operations + + feed_from_string (a_content: READABLE_STRING_8): like feed + -- Feed from `a_content' document. + local + p: XML_STANDARD_PARSER + cb_tree: XML_CALLBACKS_FILTER_DOCUMENT + xdoc: XML_DOCUMENT + do + create p.make + create cb_tree.make_null + p.set_callbacks (cb_tree) + p.parse_from_string_8 (a_content) + if p.is_correct then + xdoc := cb_tree.document + if is_detected (xdoc) then + Result := feed (xdoc) + end + end + end + +end diff --git a/library/text/parser/feed/src/kernel/feed.e b/library/text/parser/feed/src/kernel/feed.e new file mode 100644 index 00000000..fdb47957 --- /dev/null +++ b/library/text/parser/feed/src/kernel/feed.e @@ -0,0 +1,88 @@ +note + description: "Summary description for {FEED}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + FEED + +inherit + FEED_HELPERS + +create + make + +feature {NONE} -- Initialization + + make (a_title: READABLE_STRING_GENERAL) + do + create title.make_from_string_general (a_title) + create entries.make (1) + create links.make (1) + end + +feature -- Access + + title: IMMUTABLE_STRING_32 + -- Title of the feed/channel. + + description: detachable IMMUTABLE_STRING_32 + -- Associated description/subtitle. + + id: detachable IMMUTABLE_STRING_32 + -- Id associated with Current feed if any. + + date: detachable DATE_TIME + -- Build date. + + links: STRING_TABLE [FEED_LINK] + -- Url indexed by relation + + entries: ARRAYED_LIST [FEED_ENTRY] + -- List of feed items. + +feature -- Element change + + set_description (a_description: detachable READABLE_STRING_GENERAL) + do + if a_description = Void then + description := Void + else + create description.make_from_string_general (a_description) + end + end + + set_id (a_id: detachable READABLE_STRING_GENERAL) + do + if a_id = Void then + id := Void + else + create id.make_from_string_general (a_id) + end + end + + set_updated_date_with_text (a_date_text: detachable READABLE_STRING_32) + do + if a_date_text = Void then + date := Void + else + date := date_time (a_date_text) + end + end + + add_entry (e: FEED_ENTRY) + do + entries.force (e) + end + +feature -- Visitor + + accept (vis: FEED_VISITOR) + do + vis.visit_feed (Current) + end + +invariant + +end diff --git a/library/text/parser/feed/src/kernel/feed_author.e b/library/text/parser/feed/src/kernel/feed_author.e new file mode 100644 index 00000000..45ae8c49 --- /dev/null +++ b/library/text/parser/feed/src/kernel/feed_author.e @@ -0,0 +1,46 @@ +note + description: "Summary description for {FEED_AUTHOR}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + FEED_AUTHOR + +create + make + +feature {NONE} -- Initialization + + make (a_name: READABLE_STRING_GENERAL) + do + create name.make_from_string_general (a_name) + end + +feature -- Access + + name: IMMUTABLE_STRING_32 + + email: detachable READABLE_STRING_8 + +feature -- Element change + + set_email (a_email: detachable READABLE_STRING_GENERAL) + do + if a_email = Void then + email := Void + elseif a_email.is_valid_as_string_8 then + email := a_email.as_string_8 + else + email := Void + end + end + +feature -- Visitor + + accept (vis: FEED_VISITOR) + do + vis.visit_author (Current) + end + +end diff --git a/library/text/parser/feed/src/kernel/feed_entry.e b/library/text/parser/feed/src/kernel/feed_entry.e new file mode 100644 index 00000000..31b7efdc --- /dev/null +++ b/library/text/parser/feed/src/kernel/feed_entry.e @@ -0,0 +1,175 @@ +note + description: "Summary description for {FEED_ENTRY}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + FEED_ENTRY + +inherit + FEED_HELPERS + undefine + is_equal + end + + COMPARABLE + +create + make + +feature {NONE} -- Initialization + + make (a_title: READABLE_STRING_GENERAL) + do + create title.make_from_string_general (a_title) + create links.make (1) + end + +feature -- Access + + title: IMMUTABLE_STRING_32 + -- Title of associated feed item. + + description: detachable IMMUTABLE_STRING_32 + -- Optional description (or summary). + + content: detachable IMMUTABLE_STRING_32 + -- Content of Current feed item. + + content_type: detachable READABLE_STRING_8 + -- Optional content type for `content'. + -- By default, this should be text/html. + + content_type_or_default (dft: READABLE_STRING_8): READABLE_STRING_8 + do + if attached content_type as l_type then + Result := l_type + else + Result := dft + end + end + + id: detachable IMMUTABLE_STRING_32 + -- Identifier of current feed item, if any/ + + date: detachable DATE_TIME + -- Publishing date. + + links: STRING_TABLE [FEED_LINK] + -- Url indexed by relation + + categories: detachable LIST [READABLE_STRING_32] + -- Categories + + author: detachable FEED_AUTHOR + -- Author information. + +feature -- Status report + + has_category (cat: READABLE_STRING_GENERAL): BOOLEAN + -- Has category `cat'? + --| note: case insensitive. + do + if attached categories as cats then + Result := across cats as ic some cat.is_case_insensitive_equal (ic.item) end + end + end + +feature -- Comparison + + is_less alias "<" (other: like Current): BOOLEAN + -- Is current object less than `other'? + local + d1,d2: like date + do + d1 := date + d2 := other.date + if d1 = Void and d2 = Void then + Result := title < other.title + elseif d1 = Void then + Result := True + elseif d2 = Void then + Result := False + else + if d1 ~ d2 then + Result := title < other.title + else + Result := d1 < d2 + end + end + end + +feature -- Element change + + set_id (a_id: detachable READABLE_STRING_GENERAL) + do + if a_id = Void then + id := Void + else + create id.make_from_string_general (a_id) + end + end + + set_description (a_description: detachable READABLE_STRING_GENERAL) + do + if a_description = Void then + description := Void + else + create description.make_from_string_general (a_description) + end + end + + set_content (a_content: detachable READABLE_STRING_GENERAL; a_type: detachable READABLE_STRING_GENERAL) + do + if a_content = Void then + content := Void + content_type := Void + else + create content.make_from_string_general (a_content) + if a_type = Void then + content_type := Void + else + content_type := a_type.as_string_8 + end + end + end + + set_updated_date_with_text (a_date_text: detachable READABLE_STRING_32) + do + if a_date_text = Void then + date := Void + else + date := date_time (a_date_text) + end + end + + set_author (a_author: detachable FEED_AUTHOR) + do + author := a_author + end + + set_category (cat: READABLE_STRING_GENERAL) + local + cats: like categories + do + cats := categories + if cats = Void then + create {ARRAYED_LIST [READABLE_STRING_32]} cats.make (1) + categories := cats + end + cats.force (cat.as_string_32) + ensure + cat_set: has_category (cat) + end + +feature -- Visitor + + accept (vis: FEED_VISITOR) + do + vis.visit_entry (Current) + end + +invariant + +end diff --git a/library/text/parser/feed/src/kernel/feed_link.e b/library/text/parser/feed/src/kernel/feed_link.e new file mode 100644 index 00000000..afe81df1 --- /dev/null +++ b/library/text/parser/feed/src/kernel/feed_link.e @@ -0,0 +1,56 @@ +note + description: "Summary description for {FEED_LINK}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + FEED_LINK + +create + make + +feature {NONE} -- Initialization + + make (a_href: READABLE_STRING_8) + do + href := a_href + set_relation (Void) + end + +feature -- Access + + href: READABLE_STRING_8 + + relation: READABLE_STRING_32 + + type: detachable READABLE_STRING_8 + +feature -- Element change + + set_relation (rel: detachable READABLE_STRING_GENERAL) + do + if rel = Void then + relation := "" + else + relation := rel.as_string_8 + end + end + + set_type (a_type: detachable READABLE_STRING_GENERAL) + do + if a_type = Void then + type := Void + else + type := a_type.as_string_8 + end + end + +feature -- Visitor + + accept (vis: FEED_VISITOR) + do + vis.visit_link (Current) + end + +end diff --git a/library/text/parser/feed/src/rss/rss_2_feed_parser.e b/library/text/parser/feed/src/rss/rss_2_feed_parser.e new file mode 100644 index 00000000..162ffb14 --- /dev/null +++ b/library/text/parser/feed/src/rss/rss_2_feed_parser.e @@ -0,0 +1,125 @@ +note + description: "[ + RSS 2.0 Parser. + + Warning: the implementation may not support the full RSS 2.0 specification. + ]" + date: "$Date$" + revision: "$Revision$" + EIS: "name=RSS at wikipedia", "protocol=URI", "src=https://en.wikipedia.org/wiki/RSS" + EIS: "name=RDF Site Summary (RSS) 1.0", "protocol=URI", "src=http://purl.org/rss/1.0/spec" + +class + RSS_2_FEED_PARSER + +inherit + FEED_PARSER + +feature -- Access + + name: STRING = "rss2" + -- Associated name. + + is_detected (xdoc: XML_DOCUMENT): BOOLEAN + -- Is `xdoc' an ATOM feed representation? + do + if attached {XML_ELEMENT} xdoc.element_by_name ("rss") as x_rss then + if attached xml_attribute_text (x_rss, "version") as l_version and then + l_version.starts_with ("2.") + then + Result := True + else + -- Let's default to RSS 2.0 for now. + Result := True + end + end + end + + feed (xdoc: XML_DOCUMENT): detachable FEED + -- Feed from `xdoc' XML RSS 2.0 document. + local + lnk: FEED_LINK + x_item, x_content, x_author: detachable XML_ELEMENT + e: FEED_ENTRY + l_author: FEED_AUTHOR + do + if attached xdoc.element_by_name ("rss") as x_rss then + if + attached xml_attribute_text (x_rss, "version") as l_version and then + l_version.starts_with ("2.") + then + if attached x_rss.element_by_name ("channel") as x_channel then + if attached xml_element_text (x_channel, "title") as x_title then + create Result.make (x_title) + Result.set_description (xml_element_text (x_channel, "description")) + Result.set_updated_date_with_text (xml_element_text (x_channel, "lastBuildDate")) + if attached links_from_xml (x_channel, "link") as l_links then + across + l_links as link_ic + loop + lnk := link_ic.item + Result.links.force (lnk, lnk.relation) + end + end + if attached x_channel.elements_by_name ("item") as x_items then + across + x_items as ic + loop + x_item := ic.item + if attached xml_element_text (x_item, "title") as e_title then + create e.make (e_title) + e.set_description (xml_element_text (x_item, "description")) + e.set_updated_date_with_text (xml_element_text (x_item, "pubDate")) + + e.set_id (xml_element_text (x_item, "guid")) + + x_author := x_item.element_by_name ("creator") + if x_author = Void then + x_author := element_by_prefixed_name (x_item, "dc" , "creator") + end + + if + x_author /= Void and then + attached x_author.text as l_author_name + then + create l_author.make (l_author_name) + e.set_author (l_author) + end + + if attached links_from_xml (x_item, "link") as l_links then + across + l_links as link_ic + loop + lnk := link_ic.item + e.links.force (lnk, lnk.relation) + end + end + if attached x_item.elements_by_name ("category") as x_categories then + across + x_categories as cats + loop + if attached cats.item.text as cat then + e.set_category (cat) + end + end + end + x_content := x_item.element_by_name ("content") + if x_content = Void then + x_content := element_by_prefixed_name (x_item, "content" , "encoded") + if x_content /= Void then + e.set_content (x_content.text, Void) + end + else + e.set_content (xml_element_code (x_content), Void) + end + Result.add_entry (e) + end + end + end + end + end + end + end + end + +end diff --git a/library/text/parser/feed/src/rss/rss_2_generator.e b/library/text/parser/feed/src/rss/rss_2_generator.e new file mode 100644 index 00000000..dd506794 --- /dev/null +++ b/library/text/parser/feed/src/rss/rss_2_generator.e @@ -0,0 +1,133 @@ +note + description: "Convert a FEED into an RSS 2.0 content." + date: "$Date$" + revision: "$Revision$" + +class + RSS_2_GENERATOR + +inherit + FEED_VISITOR + + FEED_GENERATOR + +create + make + +feature {NONE} -- Initialization + + make (a_buffer: STRING_8) + do + buffer := a_buffer + initialize + end + + buffer: STRING_8 + +feature -- Visitor + + visit_feed (a_feed: FEED) + do + buffer.append ("[ + + + + ]") + buffer.append_character ('%N') + indent + indent + append_content_tag_to ("title", Void, a_feed.title, buffer) + append_content_tag_to ("description", Void, a_feed.description, buffer) + across + a_feed.links as tb + loop + tb.item.accept (Current) + end + if attached a_feed.date as dt then + append_content_tag_to ("lastBuildDate", Void, date_to_string (dt), buffer) + end + across + a_feed.entries as ic + loop + ic.item.accept (Current) + end + exdent + exdent + buffer.append ("[ + + + ]") + end + + visit_entry (a_entry: FEED_ENTRY) + do + buffer.append (indentation) + buffer.append ("%N") + indent + append_content_tag_to ("title", Void, a_entry.title, buffer) + if attached a_entry.date as dt then + append_content_tag_to ("pubDate", Void, date_to_string (dt), buffer) + end + across + a_entry.links as tb + loop + tb.item.accept (Current) + end + if attached a_entry.author as u then + u.accept (Current) + end + if attached a_entry.categories as cats then + across + cats as ic + loop + append_content_tag_to ("category", Void, ic.item, buffer) + end + end + append_content_tag_to ("guid", Void, a_entry.id, buffer) + append_content_tag_to ("description", Void, a_entry.description, buffer) + append_cdata_content_tag_to ("content:encoded", Void, a_entry.content, buffer) + + exdent + buffer.append (indentation) + buffer.append ("%N") + end + + visit_link (a_link: FEED_LINK) + local + attr: detachable ARRAYED_LIST [TUPLE [name: READABLE_STRING_8; value: READABLE_STRING_32]] + tu: TUPLE [name: READABLE_STRING_8; value: READABLE_STRING_32] + do + create attr.make (2) + if attached a_link.relation as rel and then not rel.is_whitespace then + tu := ["rel", rel] + attr.force (tu) + end + if attached a_link.type as t and then not t.is_whitespace then + tu := ["type", t.as_string_32] + attr.force (tu) + end + if attr.is_empty then + attr := Void + end + append_content_tag_to ("link", attr, a_link.href, buffer) + end + + visit_author (a_author: FEED_AUTHOR) + do + append_content_tag_to ("dc:creator", Void, a_author.name, buffer) + end + +feature {NONE} -- Helpers + + date_to_string (dt: DATE_TIME): STRING + local + htdate: HTTP_DATE + do + create htdate.make_from_date_time (dt) + Result := htdate.rfc850_string + end + +end diff --git a/library/text/parser/feed/src/support/feed_generator.e b/library/text/parser/feed/src/support/feed_generator.e new file mode 100644 index 00000000..3396c135 --- /dev/null +++ b/library/text/parser/feed/src/support/feed_generator.e @@ -0,0 +1,105 @@ +note + description: "Summary description for {FEED_GENERATOR}." + author: "" + date: "$Date$" + revision: "$Revision$" + +deferred class + FEED_GENERATOR + +inherit + XML_UTILITIES + +feature {NONE} -- Helpers + + initialize + do + create indentation.make_empty + end + + indent + do + indentation.append ("%T") + end + + exdent + require + has_indentation: indentation.count > 0 + do + indentation.remove_tail (1) + end + + indentation: STRING + + append_content_tag_to (a_tagname: READABLE_STRING_8; a_attr: detachable ITERABLE [TUPLE [name: READABLE_STRING_8; value: detachable READABLE_STRING_GENERAL]]; a_content: detachable READABLE_STRING_GENERAL; a_output: STRING) + do + if a_content /= Void or a_attr /= Void then + a_output.append (indentation) + a_output.append ("<") + a_output.append (a_tagname) + if a_attr /= Void then + across + a_attr as ic + loop + if attached ic.item.value as l_att_value then + a_output.append_character (' ') + a_output.append (ic.item.name) + a_output.append_character ('=') + a_output.append_character ('%"') + a_output.append (escaped_unicode_xml (l_att_value.as_string_32)) + a_output.append_character ('%"') + end + end + end + if a_content = Void then + a_output.append ("/>") + else + a_output.append (">") + a_output.append (escaped_unicode_xml (a_content.as_string_32)) + a_output.append ("%N") + end + end + end + + append_cdata_content_tag_to (a_tagname: READABLE_STRING_8; a_attr: detachable ITERABLE [TUPLE [name: READABLE_STRING_8; value: READABLE_STRING_32]]; a_content: detachable READABLE_STRING_32; a_output: STRING) + do + if a_content /= Void then + a_output.append (indentation) + a_output.append ("<") + a_output.append (a_tagname) + if a_attr /= Void then + across + a_attr as ic + loop + a_output.append_character (' ') + a_output.append (ic.item.name) + a_output.append_character ('=') + a_output.append_character ('%"') + a_output.append (escaped_unicode_xml (ic.item.value)) + a_output.append_character ('%"') + end + end + a_output.append (">") + a_output.append (to_cdata_element (a_content)) + a_output.append ("%N") + end + end + + to_cdata_element (a_value: READABLE_STRING_GENERAL): STRING + local + cdata: XML_CHARACTER_DATA + xdoc: XML_DOCUMENT + pprinter: XML_NODE_PRINTER + l_output: XML_STRING_8_OUTPUT_STREAM + do + create xdoc.make + create cdata.make (xdoc.root_element, a_value.as_string_32) + create pprinter.make + create Result.make (cdata.content_count) + create l_output.make (Result) + pprinter.set_output (l_output) + pprinter.process_character_data (cdata) + end + + +end diff --git a/library/text/parser/feed/src/support/feed_helpers.e b/library/text/parser/feed/src/support/feed_helpers.e new file mode 100644 index 00000000..772fed81 --- /dev/null +++ b/library/text/parser/feed/src/support/feed_helpers.e @@ -0,0 +1,87 @@ +note + description: "Summary description for {FEED_HELPERS}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + FEED_HELPERS + +feature -- Helpers + + date_time (a_date_string: READABLE_STRING_32): DATE_TIME + -- "2015-08-14T10:34:13.493740Z" + -- "Sat, 07 Sep 2002 00:00:01 GMT" + local + i,j: INTEGER + s: READABLE_STRING_GENERAL + y,m,d,h,min: INTEGER + sec: REAL_64 + htdate: HTTP_DATE + str: STRING_32 + do + if a_date_string.count > 0 and then a_date_string.item (1).is_digit then + i := a_date_string.index_of ('-', 1) + if i > 0 then + s := a_date_string.substring (1, i - 1) + y := s.to_integer_32 -- Year + j := i + 1 + i := a_date_string.index_of ('-', j) + if i > 0 then + s := a_date_string.substring (j, i - 1) + m := s.to_integer_32 -- Month + j := i + 1 + i := a_date_string.index_of ('T', j) + if i = 0 then + i := a_date_string.index_of (' ', j) + end + if i = 0 then + i := a_date_string.count + 1 + end + if i > 0 then + s := a_date_string.substring (j, i - 1) + if s.is_integer then + d := s.to_integer_32 -- Day + j := i + 1 + i := a_date_string.index_of (':', j) + if i > 0 then + s := a_date_string.substring (j, i - 1) + h := s.to_integer + j := i + 1 + i := a_date_string.index_of (':', j) + if i > 0 then + s := a_date_string.substring (j, i - 1) + min := s.to_integer + j := i + 1 + i := a_date_string.index_of ('Z', j) + if i = 0 then + i := a_date_string.count + 1 + end + s := a_date_string.substring (j, i - 1) + sec := s.to_double + end + end + end + end + end + end + create Result.make (y,m,d,h,m,0) + Result.fine_second_add (sec) + else + i := a_date_string.index_of ('+', 1) + if i > 0 then + str := a_date_string.substring (1, i - 1) + str.append (" GMT") + create htdate.make_from_string (str) + Result := htdate.date_time + if a_date_string.substring (i + 1, a_date_string.count).is_case_insensitive_equal ("0000") then + + end + else + create htdate.make_from_string (a_date_string) + Result := htdate.date_time + end + end + end + +end diff --git a/library/text/parser/feed/src/support/feed_parser_utilities.e b/library/text/parser/feed/src/support/feed_parser_utilities.e new file mode 100644 index 00000000..c03952fa --- /dev/null +++ b/library/text/parser/feed/src/support/feed_parser_utilities.e @@ -0,0 +1,84 @@ +note + description: "Summary description for {FEED_PARSER_UTILITIES}." + author: "" + date: "$Date$" + revision: "$Revision$" + +deferred class + FEED_PARSER_UTILITIES + +feature -- Access + + xml_element_text (a_parent: XML_ELEMENT; a_name: READABLE_STRING_GENERAL): detachable READABLE_STRING_32 + do + if attached a_parent.element_by_name (a_name) as elt then + if attached elt.text as t then + t.left_adjust + t.right_adjust + Result := t + end + end + end + + xml_attribute_text (a_elt: XML_ELEMENT; a_att_name: READABLE_STRING_GENERAL): detachable READABLE_STRING_32 + do + if attached a_elt.attribute_by_name (a_att_name) as att then + Result := att.value + end + end + + xml_element_code (elt: XML_ELEMENT): STRING_32 + local + xprinter: XML_NODE_PRINTER + do + create xprinter.make + create Result.make_empty + xprinter.set_output (create {XML_STRING_32_OUTPUT_STREAM}.make (Result)) + xprinter.process_element (elt) + end + + links_from_xml (elt: XML_ELEMENT; a_link_elt_name: READABLE_STRING_GENERAL): detachable ARRAYED_LIST [FEED_LINK] + local + x_link: XML_ELEMENT + lnk: FEED_LINK + do + if attached elt.elements_by_name (a_link_elt_name) as x_links then + create Result.make (0) + across + x_links as ic + loop + x_link := ic.item + if attached xml_attribute_text (x_link, "href") as l_href and then + l_href.is_valid_as_string_8 + then + create lnk.make (l_href.as_string_8) + lnk.set_relation (xml_attribute_text (x_link, "rel")) + lnk.set_type (xml_attribute_text (x_link, "type")) + Result.force (lnk) + elseif attached x_link.text as l_url and then not l_url.is_whitespace then + create lnk.make (l_url) + Result.force (lnk) + end + end + end + end + + element_by_prefixed_name (elt: XML_ELEMENT; a_ns_prefix: READABLE_STRING_GENERAL; a_name: READABLE_STRING_GENERAL): detachable XML_ELEMENT + do + across + elt as ic + until + Result /= Void + loop + if attached {XML_ELEMENT} ic.item as x_item then + if + attached x_item.ns_prefix as l_ns_prefix and then a_ns_prefix.same_string (l_ns_prefix) and then + a_name.same_string (x_item.name) + then + Result := x_item + end + end + end + end + +end diff --git a/library/text/parser/feed/src/support/feed_to_string_32_visitor.e b/library/text/parser/feed/src/support/feed_to_string_32_visitor.e new file mode 100644 index 00000000..db795e07 --- /dev/null +++ b/library/text/parser/feed/src/support/feed_to_string_32_visitor.e @@ -0,0 +1,197 @@ +note + description: "Convert a FEED to STRING_32 representation. Mostly for debug output." + date: "$Date$" + revision: "$Revision$" + +class + FEED_TO_STRING_32_VISITOR + +inherit + FEED_VISITOR + +create + make + +feature {NONE} -- Initialization + + make (a_buffer: STRING_32) + do + buffer := a_buffer + create indentation.make_empty + end + + buffer: STRING_32 + +feature -- Visitor + + visit_feed (a_feed: FEED) + do + if attached a_feed.id as l_id then + append_text ("#") + append (l_id) + append_new_line + end + if attached a_feed.date as dt then + append_text ("date:") + append (dt.out) + append_new_line + end + + append_text (a_feed.title) + append_new_line + indent + if attached a_feed.description as l_desc then + append_text (l_desc) + append_new_line + end + + across + a_feed.links as ic + loop + ic.item.accept (Current) + append_new_line + end + + append_new_line + + across + a_feed.entries as ic + loop + exdent + append_text (create {STRING_32}.make_filled ('-', 40)) + append_new_line + indent + ic.item.accept (Current) + append_new_line + end + end + + visit_entry (a_entry: FEED_ENTRY) + do + if attached a_entry.id as l_id then + append_text ("#") + append (l_id) + append_new_line + end + if attached a_entry.date as dt then + append_text ("date:") + append (dt.out) + append_new_line + end + append_text (a_entry.title) + append_new_line + indent + if attached a_entry.author as l_author then + l_author.accept (Current) + append_new_line + end + if attached a_entry.categories as cats then + append_text ("Categories: ") + from + cats.start + until + cats.after + loop + if not cats.isfirst then + append (", ") + end + append (cats.item) + cats.forth + end + append_new_line + end + if attached a_entry.description as l_summary then + append_text (l_summary) + append_new_line + end + + across + a_entry.links as ic + loop + ic.item.accept (Current) + append_new_line + end + + if attached a_entry.content as l_content then + append_text (l_content) + append_new_line + end + exdent + end + + visit_link (a_link: FEED_LINK) + local + s: STRING_32 + do + create s.make_empty + s.append_string_general ("@") + s.append_string (a_link.relation) + s.append_string (" -> ") + s.append_string (a_link.href) + append_text (s) + end + + visit_author (a_author: FEED_AUTHOR) + local + s: STRING_32 + do + create s.make_empty + s.append_string_general ("by ") + s.append_string (a_author.name) + if attached a_author.email as l_email then + s.append_character (' ') + s.append_character ('(') + s.append_string_general (l_email) + s.append_character (')') + end + append_text (s) + end + +feature -- Helper + + indentation: STRING_32 + + indent + do + indentation.append (" ") + end + + exdent + do + indentation.remove_tail (2) + end + + append_new_line + do + append ("%N") + end + + append_text (s: READABLE_STRING_GENERAL) + local + lst: LIST [READABLE_STRING_GENERAL] + do + if indentation.is_empty then + append (s) + else + lst := s.split ('%N') + from + lst.start + until + lst.after + loop + append (indentation) + append (lst.item) + if not lst.islast then + append ("%N") + end + lst.forth + end + end + end + + append (s: READABLE_STRING_GENERAL) + do + buffer.append_string_general (s) + end + +end diff --git a/library/text/parser/feed/src/support/feed_visitor.e b/library/text/parser/feed/src/support/feed_visitor.e new file mode 100644 index 00000000..a0f22d97 --- /dev/null +++ b/library/text/parser/feed/src/support/feed_visitor.e @@ -0,0 +1,28 @@ +note + description: "Summary description for {FEED_VISITOR}." + author: "" + date: "$Date$" + revision: "$Revision$" + +deferred class + FEED_VISITOR + +feature -- Visit + + visit_feed (a_feed: FEED) + deferred + end + + visit_link (a_link: FEED_LINK) + deferred + end + + visit_entry (a_entry: FEED_ENTRY) + deferred + end + + visit_author (a_author: FEED_AUTHOR) + deferred + end + +end diff --git a/library/text/parser/feed/tests/application.e b/library/text/parser/feed/tests/application.e new file mode 100644 index 00000000..4de78ed2 --- /dev/null +++ b/library/text/parser/feed/tests/application.e @@ -0,0 +1,91 @@ +note + description: "Summary description for {APPLICATION}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + APPLICATION + +create + make + +feature -- Initialization + + make + -- New test routine + do + test_file ("data_rss_1_0.rss") + test_web ("https://bertrandmeyer.com/feed/") + end + + test_file (fn: READABLE_STRING_GENERAL) + local + t: STRING + f: PLAIN_TEXT_FILE + do + create f.make_with_name (fn) + f.open_read + create t.make_empty + from + f.read_stream_thread_aware (1_024) + until + f.last_string.count < 1024 + loop + t.append (f.last_string) + f.read_stream_thread_aware (1_024) + end + t.append (f.last_string) + f.close + test_feed (t) + end + + test_feed (t: READABLE_STRING_8) + local + feed_parser: FEED_DEFAULT_PARSERS + vis: FEED_TO_STRING_32_VISITOR + gen: RSS_2_GENERATOR + atom_gen: ATOM_GENERATOR + s: STRING_32 + s8: STRING_8 + pp: XML_PRETTY_PRINT_FILTER + do + create feed_parser + if attached feed_parser.feed_from_string (t) as l_feed then + create s.make_empty + create vis.make (s) + l_feed.accept (vis) + print (s) + + create s8.make_empty + create gen.make (s8) + l_feed.accept (gen) + print (s8) + + create s8.make_empty + create atom_gen.make (s8) + l_feed.accept (atom_gen) + print (s8) + + end + end + + test_web (a_url: READABLE_STRING_8) + local + cl: LIBCURL_HTTP_CLIENT + sess: HTTP_CLIENT_SESSION + do + create cl.make + sess := cl.new_session (a_url) + sess.set_is_insecure (True) + if attached sess.get ("", Void) as resp then + if + not resp.error_occurred and then + attached resp.body as l_feed + then + test_feed (l_feed) + end + end + end + +end diff --git a/library/text/parser/feed/tests/atom_test_set.e b/library/text/parser/feed/tests/atom_test_set.e new file mode 100644 index 00000000..72c73ecc --- /dev/null +++ b/library/text/parser/feed/tests/atom_test_set.e @@ -0,0 +1,74 @@ +note + description: "[ + Eiffel tests that can be executed by testing tool. + ]" + author: "EiffelStudio test wizard" + date: "$Date$" + revision: "$Revision$" + testing: "type/manual" + +class + ATOM_TEST_SET + +inherit + EQA_TEST_SET + +feature -- Test routines + + test_atom + -- New test routine + local + feed_parser: FEED_DEFAULT_PARSERS + vis: FEED_TO_STRING_32_VISITOR + s: STRING_32 + do + create feed_parser + if attached feed_parser.feed_from_string (atom_string_1) as l_feed then + create s.make_empty + create vis.make (s) + l_feed.accept (vis) + print (s) + assert ("not_implemented", False) + end + assert ("not_implemented", False) + end + +feature {NONE} -- Data + + atom_string_1: STRING = "[ + + + + + Example Feed + A subtitle. + + + urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6 + 2003-12-13T18:30:02Z + + + + Atom-Powered Robots Run Amok + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + +
+

This is the entry content.

+
+
+ + John Doe + johndoe@example.com + +
+ +
+ ]" +end + + diff --git a/library/text/parser/feed/tests/rss_test_set.e b/library/text/parser/feed/tests/rss_test_set.e new file mode 100644 index 00000000..860823c0 --- /dev/null +++ b/library/text/parser/feed/tests/rss_test_set.e @@ -0,0 +1,60 @@ +note + description: "Summary description for {RSS_TEST_SET}." + author: "" + date: "$Date$" + revision: "$Revision$" + +class + RSS_TEST_SET + +inherit + EQA_TEST_SET + +feature -- Test routines + + test_rss_2 + -- New test routine + local + feed_parser: FEED_DEFAULT_PARSERS + vis: FEED_TO_STRING_32_VISITOR + s: STRING_32 + do + create feed_parser + if attached feed_parser.feed_from_string (rss_2_string_1) as l_feed then + create s.make_empty + create vis.make (s) + l_feed.accept (vis) + print (s) + assert ("not_implemented", False) + end + assert ("not_implemented", False) + end + +feature {NONE} -- Data + + rss_2_string_1: STRING = "[ + + + + Mon site + Ceci est un exemple de flux RSS 2.0 + Sat, 07 Sep 2002 00:00:01 GMT + http://www.example.org + + Post N1 + This is my first post + Sat, 07 Sep 2002 00:00:01 GMT + http://www.example.org/actu1 + + + Post N2 + This is my second post + Sat, 07 Sep 2002 00:00:01 GMT + http://www.example.org/actu2 + + + + ]" +end + + diff --git a/library/text/parser/feed/tests/tests-safe.ecf b/library/text/parser/feed/tests/tests-safe.ecf new file mode 100644 index 00000000..6620d472 --- /dev/null +++ b/library/text/parser/feed/tests/tests-safe.ecf @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/library/text/parser/feed/tests/tests.ecf b/library/text/parser/feed/tests/tests.ecf new file mode 100644 index 00000000..564fc66c --- /dev/null +++ b/library/text/parser/feed/tests/tests.ecf @@ -0,0 +1,12 @@ + + + + + + + + + + + +