diff --git a/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-console-input.wiki b/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-console-input.wiki index ec971ad2..9818c141 100644 --- a/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-console-input.wiki +++ b/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-console-input.wiki @@ -11,41 +11,41 @@ class EIFFEL_SCAN inherit - SCANNING - rename - make as scanning_make - end; + SCANNING + rename + make as scanning_make + end; - ARGUMENTS - undefine - copy, consistent, is_equal, setup - end + ARGUMENTS + undefine + copy, consistent, is_equal, setup + end create - make + make feature - make is - -- Create a lexical analyser for Eiffel if none, - -- then use it to analyze the file of name - -- `file_name'. - local - file_name: STRING; - do - if argument_count < 1 then - io.error.putstring ("Usage: eiffel_scan eiffel_class_file.e%N") - else - file_name := argument (1); - scanning_make; - build ("eiffel_lex", "eiffel_regular"); - io.putstring ("Scanning file `"); - io.putstring (file_name); - io.putstring ("'.%N"); - analyze (file_name) - end - end -- make + make + -- Create a lexical analyser for Eiffel if none, + -- then use it to analyze the file of name + -- `file_name'. + local + file_name: STRING; + do + if argument_count < 1 then + io.error.putstring ("Usage: eiffel_scan eiffel_class_file.e%N") + else + file_name := argument (1); + scanning_make; + build ("eiffel_lex", "eiffel_regular"); + io.putstring ("Scanning file `"); + io.putstring (file_name); + io.putstring ("'.%N"); + analyze (file_name) + end + end -- make end -- class EIFFEL_SCAN diff --git a/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-text.wiki b/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-text.wiki index e94abcc7..2bb58404 100644 --- a/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-text.wiki +++ b/documentation/current/solutions/text-processing/eiffellex/eiffellex-sample/eiffel-scanner/eiffel-scan-text.wiki @@ -3,51 +3,51 @@ [[Property:uuid|092bd183-2fc4-ae65-02b9-d66933492a50]] - class - EIFFEL_SCAN +class + EIFFEL_SCAN - inherit - SCANNING - rename - make as scanning_make - end +inherit + SCANNING + rename + make as scanning_make + end - ARGUMENTS - undefine - copy, - consistent, - is_equal, - setup - end + ARGUMENTS + undefine + copy, + consistent, + is_equal, + setup + end - create - make +create + make - feature +feature - make is - -- Create a lexical analyser for Eiffel if none, - -- then use it to analyze the file of name - -- file_name. - local - file_name: STRING - do - if argument_count < 1 then - io.error.putstring ("Usage: eiffel_scan eiffel_class_file.e%N") - else - file_name := argument (1) - scanning_make - build ("eiffel_lex", "eiffel_regular") - io.putstring ("Scanning file `") - io.putstring (file_name) - io.putstring ("'.%N") - analyze (file_name) - end - end + make + -- Create a lexical analyser for Eiffel if none, + -- then use it to analyze the file of name + -- file_name. + local + file_name: STRING + do + if argument_count < 1 then + io.error.putstring ("Usage: eiffel_scan eiffel_class_file.e%N") + else + file_name := argument (1) + scanning_make + build ("eiffel_lex", "eiffel_regular") + io.putstring ("Scanning file `") + io.putstring (file_name) + io.putstring ("'.%N") + analyze (file_name) + end + end - end -- class EIFFEL_SCAN +end -- class EIFFEL_SCAN - + diff --git a/documentation/current/solutions/text-processing/eiffellex/eiffellex-tutorial.wiki b/documentation/current/solutions/text-processing/eiffellex/eiffellex-tutorial.wiki index fd307e87..544a9ea9 100644 --- a/documentation/current/solutions/text-processing/eiffellex/eiffellex-tutorial.wiki +++ b/documentation/current/solutions/text-processing/eiffellex/eiffellex-tutorial.wiki @@ -96,14 +96,15 @@ Class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] may be used as a ===The build procedure=== To obtain a lexical analyzer in a descendant of class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] , use the procedure - build (store_file_name, grammar_file_name: STRING) + + build (store_file_name, grammar_file_name: STRING) -If no file of name store_file_name exists, then build reads the lexical grammar from the file of name grammar_file_name , builds the corresponding lexical analyzer, and stores it into store_file_name . +If no file of name store_file_name exists, then build reads the lexical grammar from the file of name grammar_file_name, builds the corresponding lexical analyzer, and stores it into store_file_name. -If there already exists a file of name grammar_file_name , build uses it to recreate an analyzer without using the grammar_file_name . +If there already exists a file of name grammar_file_name, build uses it to recreate an analyzer without using the grammar_file_name . ===Lexical grammar files=== -A lexical grammar file (to be used as second argument to build, corresponding to grammar_file_name ) should conform to a simple structure, of which the file ''eiffel_regular'' in the examples directory provides a good illustration. +A lexical grammar file (to be used as second argument to build, corresponding to grammar_file_name) should conform to a simple structure, of which the file ''eiffel_regular'' in the examples directory provides a good illustration. Here is the general form: @@ -141,7 +142,7 @@ Once build has given you an analyzer, you may use it to analyze analyze (input_file_name: STRING) -This will read in and process successive input tokens. Procedure analyze will apply to each of these tokens the action of procedure do_a_token. As defined in SCANNING, this procedure prints out information on the token: its string value, its type, whether it is a keyword and if so its code. You may redefine it in any descendant class so as to perform specific actions on each token. +This will read in and process successive input tokens. Procedure analyze will apply to each of these tokens the action of procedure do_a_token. As defined in SCANNING, this procedure prints out information on the token: its string value, its type, whether it is a keyword and if so its code. You may redefine it in any descendant class so as to perform specific actions on each token. The initial action begin_analysis, which by default prints a header, and the terminal action end_analysis, which by default does nothing, may also be redefined. @@ -153,9 +154,9 @@ Let us look more precisely at how we can use a lexical analyzer to analyze an in ===Class LEXICAL=== -Procedure analyze takes care of the most common needs of lexical analysis. But if you need more advanced lexical analysis facilities you will need an instance of class [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] (a direct instance of [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] itself or of one of its proper descendants). If you are using class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] as described above, you will have access to such an instance through the attribute analyzer. +Procedure analyze takes care of the most common needs of lexical analysis. But if you need more advanced lexical analysis facilities you will need an instance of class [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] (a direct instance of [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] itself or of one of its proper descendants). If you are using class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] as described above, you will have access to such an instance through the attribute analyzer. -This discussion will indeed assume that you have an entity attached to an instance of [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] . The name of that entity is assumed to be analyzer, although it does not need to be the attribute from [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] . You can apply to that analyzer the various exported features features of class [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] , explained below. All the calls described below should use analyzer as their target, as in +This discussion will indeed assume that you have an entity attached to an instance of [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] . The name of that entity is assumed to be analyzer, although it does not need to be the attribute from [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] . You can apply to that analyzer the various exported features features of class [[ref:/libraries/lex/reference/lexical_chart|LEXICAL]] , explained below. All the calls described below should use analyzer as their target, as in analyzer.set_file ("my_file_name") @@ -172,7 +173,7 @@ You may also retrieve an analyzer from a previous session. [[ref:/libraries/lex/ analyzer ?= retrieved -If you do not want to make the class a descendant of [[ref:/libraries/base/reference/storable_chart|STORABLE]] , use the creation procedure make of [[ref:libraries/lex/reference/lexical_chart|LEXICAL]] , not to be confused with make_new above: +If you do not want to make the class a descendant of [[ref:/libraries/base/reference/storable_chart|STORABLE]] , use the creation procedure make of [[ref:libraries/lex/reference/lexical_chart|LEXICAL]] , not to be confused with make_new above: create analyzer.make analyzer ?= analyzer.retrieved @@ -182,20 +183,20 @@ If you do not want to make the class a descendant of [[ref:/libraries/base/refer To analyze a text, call set_file or set_string to specify the document to be parsed. With the first call, the analysis will be applied to a file; with the second, to a string. -{{note|if you use procedure analyze of [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] , you do not need any such call, since analyze calls set_file on the file name passed as argument. }} +{{note|if you use procedure analyze of [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] , you do not need any such call, since analyze calls set_file on the file name passed as argument. }} ===Obtaining the tokens=== -The basic procedure for analyzing successive tokens in the text is get_token, which reads in one token and sets up various attributes of the analyzer to record properties of that token: +The basic procedure for analyzing successive tokens in the text is get_token, which reads in one token and sets up various attributes of the analyzer to record properties of that token: * last_token, a function of type [[ref:/libraries/lex/reference/token_chart|TOKEN]] , which provides all necessary information on the last token read. * token_line_number and token_column_number, to know where the token is in the text. These queries return results of type INTEGER. -* token_type, giving the regular expression type, identified by its integer number (which is the value No_token if no correct token was recognized). -* other_possible_tokens, an array giving all the other possible token types of the last token. (If token_type is No_token the array is empty.) -* end_of_text, a boolean attribute used to record whether the end of text has been reached. If so, subsequent calls to get_token will have no effect. +* token_type, giving the regular expression type, identified by its integer number (which is the value No_token if no correct token was recognized). +* other_possible_tokens, an array giving all the other possible token types of the last token. (If token_type is No_token the array is empty.) +* end_of_text, a boolean attribute used to record whether the end of text has been reached. If so, subsequent calls to get_token will have no effect. -Procedure get_token recognizes the longest possible token. So if <, = and <= are all regular expressions in the grammar, the analyzer recognizes <= as one token, rather than < followed by =. You can use other_possible_tokens to know what shorter tokens were recognized but not retained. +Procedure get_token recognizes the longest possible token. So if <, = and <= are all regular expressions in the grammar, the analyzer recognizes <= as one token, rather than < followed by =. You can use other_possible_tokens to know what shorter tokens were recognized but not retained. -If it fails to recognize a regular expression, get_token sets token_type to No_token and advances the input cursor by one character. +If it fails to recognize a regular expression, get_token sets token_type to No_token and advances the input cursor by one character. ===The basic scheme=== @@ -217,7 +218,7 @@ Here is the most common way of using the preceding facilities: end_analysis -This scheme is used by procedure analyze of class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] , so that in standard cases you may simply inherit from that class and redefine procedures begin_analysis, do_a_token and end_analysis. If you are not inheriting from [[ref:libraries/lex/reference/scanning_chart|SCANNING]] , these names simply denote procedures that you must provide. +This scheme is used by procedure analyze of class [[ref:/libraries/lex/reference/scanning_chart|SCANNING]] , so that in standard cases you may simply inherit from that class and redefine procedures begin_analysis, do_a_token, and end_analysis. If you are not inheriting from [[ref:libraries/lex/reference/scanning_chart|SCANNING]] , these names simply denote procedures that you must provide. ==REGULAR EXPRESSIONS== @@ -234,7 +235,7 @@ denotes a set of ten tokens, each consisting of a single digit. ===Basic expressions=== -A character expression, written '''character''' where ''character'' is a single character, describes a set of tokens with just one element: the one-character token character. For example, '''0''' describes the set containing the single-digit single token ''0''. +A character expression, written '' 'character' '' where ''character'' is a single character, describes a set of tokens with just one element: the one-character token character. For example, '' '0' '' describes the set containing the single-digit single token ''0''. Cases in which character is not a printable character use the following conventions: {| border="1" @@ -270,13 +271,13 @@ Cases in which character is not a printable character use the following conventi ===Intervals=== -An interval, written ''lower..upper'' where ''lower'' and ''upper'' are character expressions, describes a set of one-character tokens: all the characters whose ASCII code is between the codes for the characters in ''lower'' and ''upper''. For example, '''0'..'9''' contains all tokens made of a single decimal digit. +An interval, written ''lower..upper'' where ''lower'' and ''upper'' are character expressions, describes a set of one-character tokens: all the characters whose ASCII code is between the codes for the characters in ''lower'' and ''upper''. For example, '' '0'..'9' '' contains all tokens made of a single decimal digit. ===Basic operator expressions=== -A parenthesized expression, written ( ''exp'') where ''exp'' is a regular expression, describes the same set of tokens as ''exp''. This serves to remove ambiguities in complex regular expressions. For example, the parenthesized expression ( '''0'..'9''') also describes all single-decimal-digit tokens. +A parenthesized expression, written (''exp'') where ''exp'' is a regular expression, describes the same set of tokens as ''exp''. This serves to remove ambiguities in complex regular expressions. For example, the parenthesized expression ('' '0'..'9' '') also describes all single-decimal-digit tokens. -A difference, written ''interval - char'', where ''interval'' is an interval expression and ''char'' is a character expression, describes the set of tokens which are in ''exp'' but not in ''char''. For example, the difference '''0'..'9' - '4''' describes all single-decimal-digit tokens except those made of the digit 4. +A difference, written ''interval - char'', where ''interval'' is an interval expression and ''char'' is a character expression, describes the set of tokens which are in ''exp'' but not in ''char''. For example, the difference '' '0'..'9' - '4' '' describes all single-decimal-digit tokens except those made of the digit 4. {{caution|A difference may only apply to an interval and a single character. }} @@ -287,18 +288,18 @@ An unbounded iteration, written ''*exp'' or ''+exp'' where ''exp'' is a regular A fixed iteration, written ''n exp'' where ''n'' is a natural integer constant and ''exp'' is a regular expression, describes the set of tokens made of sequences of exactly ''n'' specimens of ''exp''. For example, ''3 ('A'..'Z')'' describes the set of all three-letter upper-case tokens. ===Other operator expressions=== -A concatenation, writtenexp 1 exp 2 ... exp n, describes the set of tokens made of a specimen of exp 1 followed by a specimen of exp 2 etc. For example, the concatenation '''1'..'9' * ('0'..'9')'' describes the set of tokens made of one or more decimal digits, not beginning with a zero - in other words, integer constants in the usual notation. +A concatenation, written exp 1 exp 2 ... exp n, describes the set of tokens made of a specimen of exp 1 followed by a specimen of exp 2 etc. For example, the concatenation '' '1'..'9' * ('0'..'9')'' describes the set of tokens made of one or more decimal digits, not beginning with a zero - in other words, integer constants in the usual notation. An optional component, written ''[exp]'' where ''exp'' is a regular expression, describes the set of tokens that includes the empty token and all specimens of ''exp''. Optional components usually appear in concatenations. -Concatenations may be inconvenient when the concatenated elements are simply characters, as in '''A' ' ' 'T' 'e' 'x' 't'''. In this case you may use a '''string''' in double quotes, as in
- "A Text" +Concatenations may be inconvenient when the concatenated elements are simply characters, as in '' 'A' ' ' 'T' 'e' 'x' 't' ''. In this case you may use a '''string''' in double quotes, as in
+ + "A Text" -More generally, a string is written"a 1 a 2 ... a n"for ''n >= 0'', where thea i are characters, and is an abbreviation for the concatenation 'a 1' 'a 2' ... 'a n' -, representing a set containing a single token. In a string, the double quote character " is written \" and the backslash character \ is written \\. No other special characters are permitted; if you need special characters, use explicit concatenation. As a special case, "" represents the set containing a single empty token. +More generally, a string is written "a 1 a 2 ... a n" for ''n >= 0'', where the "a i" are characters, and is an abbreviation for the concatenation 'a 1' 'a 2' ... 'a n', representing a set containing a single token. In a string, the double quote character " is written \" and the backslash character \ is written \\. No other special characters are permitted; if you need special characters, use explicit concatenation. As a special case, "" represents the set containing a single empty token. -A union, writtenexp 1 | exp 2 | ... | exp n, describes the set of tokens which are specimens ofexp 1, or ofexp 2 etc. For example, the union ''('a'..'z') | ('A'..'Z')'' describes the set of single-letter tokens (lower-case or upper-case). +A union, writtenexp 1 | exp 2 | ... | exp n, describes the set of tokens which are specimens of exp 1, or of exp 2, etc. For example, the union ''('a'..'z') | ('A'..'Z')'' describes the set of single-letter tokens (lower-case or upper-case). ===Predefined expressions=== @@ -402,7 +403,7 @@ BOOLEAN ===Case sensitivity=== -By default, letter case is not significant for regular expressions and keywords. So if ''yes'' matches a token type defined by a regular expression, or is a keyword, the input values ''Yes'', ''yEs'' and ''yES'' will all yield the same token or keyword. This also means that '''a'..'z''' and '''a'..'z' | 'A'..'Z''' describe the same set of tokens. +By default, letter case is not significant for regular expressions and keywords. So if ''yes'' matches a token type defined by a regular expression, or is a keyword, the input values ''Yes'', ''yEs'' and ''yES'' will all yield the same token or keyword. This also means that '' 'a'..'z' '' and '' 'a'..'z' | 'A'..'Z' '' describe the same set of tokens. The regular expression syntax introduced above offers a special notation to specify that a particular expression is case-sensitive: ''~exp'', where ''exp'' is a regular expression. For example, ''~('A'..'Z')'' only covers single-upper-case-letter tokens. But for all other kinds of expression letter case is not taken into account.