# # $Id: xmlparser.icn,v 1.4 2004/10/03 15:14:32 rparlett Exp $ # # This file is in the public domain. # # Author: Robert Parlett (parlett@dial.pipex.com) # package xml import util link scan global xml_space, xml_char, xml_letter, xml_name_start, xml_name_char, xml_pubid_char # # Holds detail of a parse position in a file # class Diversion(id, subject, pos) end class XmlParser : Parser : Error(doc, resolver, error_handler, current_divert_id, divert_stack, in_ext_subset, dtd_insert_shown_error, do_namespaces_flag, preserve_insignificant_whitespace_flag, preserve_comments_flag, validate_flag ) # # Set the resolver # method set_resolver(r) return self.resolver := r end # # Get the resolver being used # method get_resolver() return self.resolver end # # Configure the parser to do namespace post-processing (the default). # method set_do_namespaces() do_namespaces_flag := 1 end # # Configure the parser to not do namespace post-processing. # method clear_do_namespaces() do_namespaces_flag := &null end # # Configure the parser to validate (the default). # method set_validate() validate_flag := 1 end # # Configure the parser to not validate. # method clear_validate() validate_flag := &null end # # Configure the parser to preserve comments (the default). # method set_preserve_comments() preserve_comments_flag := 1 end # # Configure the parser to not preserve comments. # method clear_preserve_comments() preserve_comments_flag := &null end # # Configure the parser to always keep insignificant whitespace # in the output document. Generally only useful for testing purposes; # XML canonical form needs this flag to be set because it is a byte # for byte output which needs to include insignificate whitespace. # method set_preserve_insignificant_whitespace() preserve_insignificant_whitespace_flag := 1 end # # Configure the parser to not keep insignificant whitespace (where it # can be detected via the DTD), in the output document. This is the default. # method clear_preserve_insignificant_whitespace() preserve_insignificant_whitespace_flag := &null end # # Parse the string s returning an XmlDocument object. # # [1] document ::= prolog element Misc* # method parse(s) doc := XmlDocument() dtd_insert_shown_error := &null current_divert_id := "input" divert_stack := [] normalize_eol(s) ? { parse_prolog() | fail doc.add_child(parse_element()) | fail parse_misc_star() | fail pos(0) | return err("Invalid content at end of input") } # Catch-all if \reason then fail validate_docname() validate_idrefs() if \self.do_namespaces_flag then { do_namespace_processing() | fail } return doc end method validate_docname() local docname every e := !doc.children do { if not(string(e)) & (e.get_type() == "doctype") then { docname := e.get_name() break } } if \docname ~== doc.get_root_element().get_name() then { invalid("The document name in the DOCTYPE declaration must match the name of the root element") } end # # [3] S ::= (#x20 | #x9 | #xD | #xA)+ # # @p method spaces() any(xml_space) | return err("space expected") return tab(many(xml_space)) end method dtd_spaces() dtd_any(xml_space) | return err("space expected") return tab(dtd_many(xml_space)) end # # [5] Name ::= (Letter | '_' | ':') (NameChar)* # # If quiet is non-null, just fail silently on error. # # @p method parse_name(quiet) if any(xml_name_start) then return move(1) || tab(many_star(xml_name_char)) else return /quiet & err(xml_name_start || " expected") end # # Same as parse_name, but within a DTD # # @p method dtd_parse_name() dtd_any(xml_name_start) | return err(xml_name_start || " expected") return move(1) || tab(dtd_many_star(xml_name_char)) end # # [6] Names ::= Name (S Name)* # # If quiet is non-null, just fail silently on error. # # @p method parse_names(quiet) local l l := [] repeat { put(l, parse_name(quiet)) | fail looking_at_space_then_cset(xml_name_start) | break spaces() } return l end # # [7] Nmtoken ::= (NameChar)+ # # If quiet is non-null, just fail silently on error. # # @p method parse_nmtoken(quiet) if any(xml_name_char) then return tab(many(xml_name_char)) return /quiet & err(xml_name_char || " expected") end # # Same as parse_nmtoken, but within a DTD # # @p method dtd_parse_nmtoken() dtd_any(xml_name_char) | return err(xml_name_char || " expected") return tab(dtd_many(xml_name_char)) end # # [8] Nmtokens ::= Nmtoken (S Nmtoken)* # # If quiet is non-null, just fail silently on error. # # @p method parse_nmtokens(quiet) local l l := [] repeat { put(l, parse_nmtoken(quiet)) | fail looking_at_space_then_cset(xml_name_char) | break spaces() } return l end # # [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" # # @p method parse_entity_value() local res, c any('\'\"') | return err("\' or \" expected") c := move(1) res := "" repeat { if any('%') then { # Can't reference a pe in the internal subset; see ibm test ibm29n04.xml if /in_ext_subset then return err("cannnot reference a parameter entity in the internal subset") # In an entity value, perefs (%..;) are always expanded res ||:= resolve_entity(lookup_pe_reference(parse_pe_reference())) | fail } else if match("&#") then # Char refs are expanded res ||:= parse_char_ref(1) | fail else if any('&') then # General refs aren't res ||:= parse_entity_ref() | fail else if =c then return res else res ||:= move(1) | return err("Unexpected eof") } end # # [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" # # @p method parse_attvalue() local res, c any('\'\"') | return err("\' or \" expected") c := move(1) res := "" repeat { if any('&') then res ||:= parse_reference() | fail else if any('<') then return err("'<' not allowed here") else if =c then return res else res ||:= move(1) | return err("Unexpected eof") } end # # see s3.3.3 - attribute value normalization. # attlist is the attributelist for this element, or &null if unknown. # att is the name of the attribute. # # @p method normalize_attvalue(s, attdef) local res, cdata # # Need to divert because the normalize_attvalue1 uses string scanning, and # returns errors. # start_divert("attvalue") res := normalize_attvalue1(normalize_eol(s)) | fail end_divert() # # Decide if it's a CDATA element. # if (\attdef).def_type == "CDATA" then return res # # Not a CDATA, so do extra processing on it. # res ? { tab(many(' ')) res := "" repeat { res ||:= tab(upto(' ')) if any(' ') then { tab(many(' ')) # No trailing spaces. if not(pos(0)) then res ||:= " " } else { res ||:= tab(0) break } } } return res end method normalize_attvalue1(s, circle) local res, t res := "" s ? { repeat { res ||:= tab(upto('&' ++ xml_space)) if any(xml_space) then { res ||:= " " move(1) } else if any('&') then { # Char refs aren't recursively treated; entity refs are. if match("&#") then res ||:= parse_char_ref(1) | fail else { ref := parse_entity_ref() | fail if member(\circle, ref) then return err("circular entity replacement") c2 := copy(\circle) | set() insert(c2, ref) o := lookup_entity_ref(ref) | fail # # If standalone "yes", then can't refernce the entity if it was defined in the external subset # if \doc.standalone & \o.in_ext_subset then return err("cannot reference an entity declared externally, in a standalone document") # # Can't have an external reference here. See s3.1 # \o.str | return err("reference to external entity in attribute value") # # Can't have a < char; s3.1 # if upto('<', o.str) then return err("the replacement text in an attribute value cannot contain a '<' char") # # Recursively normalize the entity ref's value # res ||:= normalize_attvalue1(o.str, c2) } } else { res ||:= tab(0) break } } } return res end # # Normalize end-of-line characters. # \r\n combinations are replaced with \n # \rx combinations are replaced with \nx where x is any character other than \n # # @p method normalize_eol(s) local res res := "" s ? repeat { res ||:= tab(upto('\r')) if any('\r') then { move(1) if any('\n') then move(1) res ||:= "\n" } else return res || tab(0) } end # # [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") # # @p method parse_system_literal() local c, res any('\'\"') | return err("\' or \" expected") c := move(1) res := tab(upto(c)) | return err("couldn't find " || c) move(1) return res end # # [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" # # @p method parse_pubid_literal() local c, res, t if any('\'') then t := xml_pubid_char -- '\'' else if any('\"') then t := xml_pubid_char else return err("\' or \" expected") c := move(1) res := tab(many(t)) | "" if not(=c) then return err(c || " expected") return res end # # [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' # # @p method parse_comment() local res, c, s, t res := Comment() c := xml_char -- '-' ="<!--" | return err("'<!--' expected") s := "" repeat { t := tab(upto('-')) | return err("- expected") check_is_char_data(t, "comment") | fail s ||:= t # A double-hypen cannot be within the comment and must signify the end. if match("--") then break s ||:= move(1) } ="-->" | return err("'-->' expected") res.comment := s return res end # # [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' # # @p method parse_pi() local res res := ProcessingInstruction() ="<?" | return err("'<?' expected") res.target := parse_pi_target() | fail if any(xml_space) then { spaces() res.content := tab(find("?>")) | return err("missing '?>'") check_is_char_data(res.content, "PI") | fail } ="?>" | return err("'?>' expected") return res end # # [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) # # @p method parse_pi_target() local s s := parse_name() | fail if map(s) == "xml" then return err("can't have 'xml' here") return s end # # [18] CDSect ::= CDStart CData CDEnd # [19] CDStart ::= '<![CDATA[' # [20] CData ::= (Char* - (Char* ']]>' Char*)) # [21] CDEnd ::= ']]>' # # @p method parse_cdsect() local res res := CData() ="<![CDATA[" | return err("'<![CDATA[' expected") res.content := tab(find("]]>")) | return err("no corresponding ']]>' in CDATA section") check_is_char_data(res.content, "PI") | fail ="]]>" return res end # # [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? # # @p method parse_prolog() if match("<?xml") then doc.add_child(parse_xml_decl()) | fail parse_misc_star() | fail if match("<!DOCTYPE") then { doc.add_child(parse_doctype_decl()) | fail parse_misc_star() | fail } return end # # [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' # # @p method parse_xml_decl() local res res := XmlDecl() ="<?xml" | return err("'<?xml' expected") res.version := parse_version_info() | fail if looking_at_space_then("encoding") then res.encoding := parse_encoding_decl() | fail if looking_at_space_then("standalone") then res.standalone := parse_sdecl() | fail opt_spaces() ="?>" | return err("'?>' expected") return res end # # [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') # # @p method parse_version_info() spaces() | fail ="version" | return err("'version' expected") parse_eq() | fail any('\'\"') | return err("\' or \" expected") c := move(1) res := parse_version_num() | fail if not(=c) then return err(c || " expected") return res end # # [25] Eq ::= S? '=' S? # # @p method parse_eq() opt_spaces() ="=" | return err("'=' expected") opt_spaces() return end # # [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ # # @p method parse_version_num() local c c := &lcase ++ &ucase ++ &digits ++ '_.:-' any(c) | return err("expected " || image(c)) return tab(many(c)) end # # Misc* # [27] Misc ::= Comment | PI | S # # @p method parse_misc_star() repeat { if match("<!--") then parse_and_maybe_add_comment(doc) | fail else if match("<?") then doc.add_child(parse_pi()) | fail else if any(xml_space) then spaces() else break } return end # # Parse and add a comment to the parent node, but only if the # preserve_comments_flag is set. # # @p method parse_and_maybe_add_comment(parent) local c c := parse_comment() | fail if \preserve_comments_flag then parent.add_child(c) return c end # # [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | DeclSep)* ']' S?)? '>' # # @p method parse_doctype_decl() res := DocType() ="<!DOCTYPE" | return err("'<!DOCTYPE' expected") spaces() | fail res.name := parse_name() | fail if looking_at_space_then("SYSTEM" | "PUBLIC") then { spaces(); res.external_id := parse_external_id(res) | fail } opt_spaces() if ="[" then { parse_markup_decls(res) | fail ="]" | return err("']' expected") opt_spaces() } =">" | return err("'>' expected") if \res.external_id then { # Resolve and parse the external subset s := resolve(res.external_id) | fail start_divert(res.external_id.to_string()) in_ext_subset := 1 s ? { parse_ext_subset() | fail pos(0) | return err("extraneous content") } in_ext_subset := &null end_divert() } return res end # # Parse (markupdecl | DeclSep) * # # [28a] DeclSep ::= PEReference | S # [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment # # @p method parse_markup_decls() repeat { if match("<!ELEMENT") then parse_element_decl() | fail else if match("<!ATTLIST") then parse_attlist_decl() | fail else if match("<!ENTITY") then parse_entity_decl() | fail else if match("<!NOTATION") then parse_notation_decl(doctype) | fail else if match("<?") then doc.add_child(parse_pi()) | fail else if match("<!--") then parse_and_maybe_add_comment(doc) | fail else if any('%') then { # Recursively parse the value of the pe. ref := parse_pe_reference() | fail o := lookup_pe_reference(ref) | fail if \o.str then { start_divert("macro") normalize_eol(o.str) ? { parse_markup_decls() | fail pos(0) | return err("extraneous content") } end_divert() } else { if \o.notation then return err("unparsed entity not allowed") # Resolve the external entity, parse it appropriately. s := resolve(o.external_id) | fail start_divert(o.external_id.to_string()) normalize_eol(s) ? { parse_markup_decls() | fail pos(0) | return err("extraneous content") } end_divert() } } else if any(xml_space) then spaces() else break } return end # # [30] extSubset ::= TextDecl? extSubsetDecl # # @p method parse_ext_subset() if match("<?xml") then parse_text_decl() | fail return parse_ext_subset_decl() end # # [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)* # # @p method parse_ext_subset_decl() repeat { if match("<![") then parse_conditional_sect() | fail else if match("<!ELEMENT") then parse_element_decl() | fail else if match("<!ATTLIST") then parse_attlist_decl() | fail else if match("<!ENTITY") then parse_entity_decl() | fail else if match("<!NOTATION") then parse_notation_decl(doctype) | fail else if match("<?") then doc.add_child(parse_pi()) | fail else if match("<!--") then parse_and_maybe_add_comment(doc) | fail else if any('%') then { # Recursively parse the value of the pe. ref := parse_pe_reference() | fail o := lookup_pe_reference(ref) | fail if \o.str then { start_divert("macro") normalize_eol(o.str) ? { parse_ext_subset_decl() | fail pos(0) | return err("extraneous content") } end_divert() } else { if \o.notation then return err("unparsed entity not allowed") # Resolve the external entity, parse it appropriately. s := resolve(o.external_id) | fail start_divert(o.external_id.to_string()) normalize_eol(s) ? { parse_ext_subset_decl() | fail pos(0) | return err("extraneous content") } end_divert() } } else if any(xml_space) then spaces() else break } return end # # [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) # # @p method parse_sdecl() spaces() | fail ="standalone" | return err("'standalone' expected") parse_eq() | fail if any('\'\"') then c := move(1) else return err("\' or \" expected") res := ="yes" | ="no" | return err("'yes' or 'no' expected") if res == "yes" then doc.standalone := 1 if not(=c) then return err(c || " expected") return res end # # [39] element ::= EmptyElemTag | STag content ETag # [40] STag ::= '<' Name (S Attribute)* S? '>' # [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' # # @p method parse_element(parent_el) local res, attlist res := XmlElement() ="<" | return err("'<' expected") res.name := parse_name() | fail # # Get a reference to the attlist for this element. This is convenient here # to avoid doing it many times during parse_attribute()'s call to # normalize_attvalue. # attlist := doc.attribute_lists[res.name] while looking_at_space_then_cset(xml_name_start) do { spaces() parse_attribute(res, attlist) | fail } opt_spaces() # # Validate the attributes # if \validate_flag then validate_attributes(res) # # Process the xml:space attribute. # if member(res.attributes, "xml:space") then { xs := res.attributes["xml:space"] if xs == "preserve" then res.xml_space_preserve := 1 else if xs ~== "default" then return err("xml:space has invalid value") } else { # # Not present, so the value is inherited from the parent # res.xml_space_preserve := (\parent_el).xml_space_preserve } if =">" then { parse_content(res) | fail parse_etag(res) | fail } else { ="/>" | return err("'>' or '/>' expected") } # # By default the whitespace_children is the same as the children, but validation # may change this by removing "insignificant" whitespace. # res.whitespace_children := res.children if \validate_flag then validate_children(res, parent_el) return res end # # [41] Attribute ::= Name Eq AttValue # # @p method parse_attribute(el, attlist) local att, val, attdef att := parse_name() | fail parse_eq() | fail # # Try to get the AttributeDef for this attribute. # attdef := (\attlist).attribute_defs[att] val := normalize_attvalue(parse_attvalue(), attdef) | fail if member(el.attributes, att) then return err("duplicated attribute declaration: " || att) insert(el.attributes, att, val) return end # # [42] ETag ::= '</' Name S? '>' # # @p method parse_etag(el) local t ="</" | return err("'</' expected") t := parse_name() | fail t == el.name | return err("mismatched tags: " || el.name || " expected") opt_spaces() =">" | return err("'>' expected") return end # # [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* # [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) # # @p method parse_content(parent, circle) local s, o, c2 repeat { if pos(0) then break if any('&') then { if match("&#") then parent.add_string(parse_char_ref(1)) | fail else { ref := parse_entity_ref() | fail if member(\circle, ref) then return err("circular entity replacement") c2 := copy(\circle) | set() insert(c2, ref) o := lookup_entity_ref(ref) | fail # # If standalone "yes", then can't refernce the entity if it was defined in the external subset # if \doc.standalone & \o.in_ext_subset then return err("cannot reference an entity declared externally, in a standalone document") if \o.str then { # Simple expansion, parsed as content. start_divert("macro") normalize_eol(o.str) ? { parse_content(parent, c2) | fail pos(0) | return err("extraneous content") } end_divert() } else { if \o.notation then return err("unparsed entity not allowed") # Resolve the external entity, parse it appropriately. s := resolve(o.external_id) | fail start_divert(o.external_id.to_string()) normalize_eol(s) ? { parse_external_parsed_entity(parent, c2) | fail pos(0) | return err("extraneous content") } end_divert() } } } else if match("<!--") then parse_and_maybe_add_comment(parent) | fail else if match("<?") then parent.add_child(parse_pi()) | fail else if match("<![CDATA[") then parent.add_child(parse_cdsect()) | fail else if any('<') then { if match("</") then break parent.add_child(parse_element(parent)) | fail } else { # # Must have more chars and initially not a < or an & # cd := tab(many(~'<&')) if find("]]>", cd) then return err("cannot have ']]>' in character content") check_is_char_data(cd, "content character data") | fail parent.add_string(cd) } } return parent end method check_is_char_data(s, msg) local j if j := upto(~xml_char, s) then return err("invalid char (" || image(s[j]) || ") in " || msg) return end # # [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' # # @p method parse_element_decl() local name, content_spec, element_decl tab(dtd_match("<!ELEMENT")) | return err("'<!ELEMENT' expected") dtd_spaces() | fail name := dtd_parse_name() | fail dtd_spaces() | fail content_spec := parse_content_spec() | fail dtd_opt_spaces() | fail =">" | return err("'>' expected") element_decl := ElementDecl() element_decl.in_ext_subset := in_ext_subset element_decl.content_spec := content_spec if member(doc.element_declarations, name) then invalid("Multiple element declarations for element " || name) else insert(doc.element_declarations, name, element_decl) return end # # [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children # # @p method parse_content_spec() local a if tab(dtd_match("EMPTY")) then return ContentSpec("EMPTY") else if tab(dtd_match("ANY")) then { a := ContentSpec("ANY") a.is_mixed_flag := 1 return a } else if dtd_match("(") then { # Awkward lookahead. i := &pos if tab(dtd_match("(")) & tab(dtd_many_star(xml_space)) & dtd_match("#PCDATA") then { tab(i) return parse_mixed() } else { tab(i) return parse_children() } } else return err("'EMPTY', 'ANY' or '(' expected") end # # [47] children ::= (choice | seq) ('?' | '*' | '+')? # # @p method parse_children() local x, op x := parse_choiceseq() | fail if dtd_any('?*+') then { op := move(1) return ContentSpec(op, x) } else return x end # # [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? # # @p method parse_cp() local x, op if dtd_any('(') then x := parse_choiceseq() | fail else if dtd_any(xml_name_start) then x := ContentSpec("name", dtd_parse_name()) | fail else return err("Name or ( expected") if dtd_any('?*+') then { op := move(1) return ContentSpec(op, x) } else return x end # # [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' # [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' # # @p method parse_choiceseq() local res, op, a tab(dtd_match("(")) | return err("'(' expected") dtd_opt_spaces() a := parse_cp() | fail dtd_opt_spaces() if dtd_any('|,') then { op := move(1) res := t := ContentSpec(op, a) repeat { dtd_opt_spaces() a := parse_cp() | fail dtd_opt_spaces() tab(dtd_match(op)) | break t.arg2 := ContentSpec(op, a) t := t.arg2 } t.arg2 := a } else res := a tab(dtd_match(")")) | return err("')' expected") return res end # # [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' # Converts to a string with spaces removed. # # @p method parse_mixed() local res, t, a, seen tab(dtd_match("(")) | return err("( expected") dtd_opt_spaces() a := ContentSpec(tab(dtd_match("#PCDATA"))) | return err("'#PCDATA' expected") dtd_opt_spaces() seen := set() if dtd_any('|') then { move(1) res := t := ContentSpec("|", a) repeat { dtd_opt_spaces() n := dtd_parse_name() | fail if member(seen, n) then invalid("name " || n || " appeared multiple times in mixed content spec") else insert(seen, n) a := ContentSpec("name", n) dtd_opt_spaces() tab(dtd_match("|")) | break t.arg2 := ContentSpec("|", a) t := t.arg2 } t.arg2 := a tab(dtd_match(")*")) | return err("')*' expected") a := ContentSpec("*", res) } else { tab(dtd_match(")")) | return err("')' expected") if tab(dtd_match("*")) then a := ContentSpec("*", a) } a.is_mixed_flag := 1 return a end # # [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' # [53] AttDef ::= S Name S AttType S DefaultDecl # # @p method parse_attlist_decl() local element_name, t tab(dtd_match("<!ATTLIST")) | return err("'<!ATTLIST' expected") dtd_spaces() | fail element_name := dtd_parse_name() | fail # # See if we already have an AttList defined for this element. If so, # new AttDefs are merged into it. In any case, multiple defs generate # a warning. # if member(doc.attribute_lists, element_name) then { warn("multiple ATTLIST definitions for element " || element_name) t := doc.attribute_lists[element_name] } else { t := AttList() insert(doc.attribute_lists, element_name, t) } repeat { dtd_looking_at_space_then_cset(xml_name_start) | break dtd_spaces() | fail attr_name := dtd_parse_name() | fail dtd_spaces() | fail attribute_def := AttributeDef() attribute_def.in_ext_subset := in_ext_subset parse_att_type(attribute_def) | fail dtd_spaces() | fail parse_default_decl(attribute_def) | fail # Check for multiple IDs if attribute_def.def_type == "ID" & \t.has_id then invalid("Multiple ID declarations for this element type") # # Second and subsequent definitions are ignored, but this generates a warning. # if member(t.attribute_defs, attr_name) then warn("Multiple attribute defs for attribute " || attr_name || " in element " || element_name) else t.add_attribute_def(attr_name, attribute_def) } dtd_opt_spaces() tab(dtd_match(">")) | return err("'>' expected") return res end # # [54] AttType ::= StringType | TokenizedType | EnumeratedType # [55] StringType ::= 'CDATA' # [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' # [57] EnumeratedType ::= NotationType | Enumeration # # @p method parse_att_type(attribute_def) local s if s := tab(dtd_match("CDATA" | "IDREFS" | "IDREF"| "ID" | "ENTITY" | "ENTITIES" | "NMTOKENS" | "NMTOKEN")) then attribute_def.def_type := s else if dtd_match("NOTATION") then { attribute_def.def_type := "NOTATION" attribute_def.def_set := parse_notation_type() | fail } else if dtd_any('(') then { attribute_def.def_type := "ENUMERATION" attribute_def.def_set := parse_enumeration() | fail } else return err("syntax error") return attribute_def end # # [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' # # @p method parse_notation_type() local res res := set() tab(dtd_match("NOTATION")) | return err("'NOTATION' expected") dtd_spaces() | fail tab(dtd_match("(")) | return err("'(' expected") repeat { dtd_opt_spaces() t := dtd_parse_name() | fail member(doc.notation_declarations, t) | invalid("Notation name " || t || " not declared") if member(res, t) then invalid("Duplicated name in notation enumeration declaration: " || t) insert(res, t) dtd_opt_spaces() tab(dtd_match("|")) | break } opt_spaces() tab(dtd_match(")")) | return err("')' expected") return res end # # [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' # # @p method parse_enumeration() local res, t res := set() tab(dtd_match("(")) | return err("'(' expected") repeat { dtd_opt_spaces() t := dtd_parse_nmtoken() | fail if member(res, t) then invalid("Duplicated token in enumeration declaration: " || t) insert(res, t) dtd_opt_spaces() tab(dtd_match("|")) | break } dtd_opt_spaces() tab(dtd_match(")")) | return err("')' expected") return res end # # [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) # # @p method parse_default_decl(attribute_def) local s, val if s := tab(dtd_match("#REQUIRED" | "#IMPLIED")) then attribute_def.default_decl := s else { if attribute_def.default_decl := tab(dtd_match("#FIXED")) then dtd_spaces() | fail val := attribute_def.default_value := normalize_attvalue(parse_attvalue(), attribute_def) | fail if attribute_def.def_type == "ID" then invalid("An attribute with type ID must have a default declaration of #IMPLIED or #REQUIRED") # # Validate default value # case attribute_def.def_type of { "CDATA" : { } "ID" : { if val ? not(parse_name(1) & pos(0)) then invalid("default attribute value " || val || " must be a valid name") } "IDREF" : { if val ? not(parse_name(1) & pos(0)) then invalid("default attribute value " || val || " must be a valid name") } "IDREFS" : { if val ? not(parse_names(1) & pos(0)) then invalid("default attribute value " || val || " must be comprised of valid names") } "ENTITY" : { if val ? not(parse_name(1) & pos(0)) then invalid("default attribute value " || val || " must be a valid name") } "ENTITIES" : { if val ? not(parse_names(1) & pos(0)) then invalid("defaul tattribute value " || val || " must be comprised of valid names") } "NMTOKEN" : { if val ? not(parse_nmtoken(1) & pos(0)) then invalid("default attribute value " || val || " must be a valid nmtoken") } "NMTOKENS" : { if val ? not(parse_nmtokens(1) & pos(0)) then invalid("default attribute value " || val || " must be comprised of valid nmtokens") } "ENUMERATION" : { if not(member(attribute_def.def_set, val)) then invalid("default attribute value " || val || " is not one of the declared enumerations") } "NOTATION" : { if not(member(attribute_def.def_set, val)) then invalid("default attribute value " || val || " is not one of the declared notations") } } } return attribute_def end # # [61] conditionalSect ::= includeSect | ignoreSect # [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' # [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' # # @p method parse_conditional_sect() tab(dtd_match("<![")) | return err("'<![' expected") dtd_opt_spaces() if tab(dtd_match("INCLUDE")) then { dtd_opt_spaces() tab(dtd_match("[")) | return err("'[' expected") parse_ext_subset_decl() | fail } else if tab(dtd_match("IGNORE")) then { dtd_opt_spaces() tab(dtd_match("[")) | return err("'[' expected") parse_ignore_sect_contents() | fail } else return err("'INCLUDE' or 'IGNORE' expected") tab(dtd_match("]]>")) | return err("']]>' expected") return end # # [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)* # [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) # # @p method parse_ignore_sect_contents() repeat { tab(upto('<]')) | return err("ignore section not closed") if match("]]>") then return else if ="<![" then { parse_ignore_sect_contents() | fail ="]]>" | return err("']]>' expected") } else move(1) } end # # [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' # [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] # # @p method parse_char_ref(expand) local s, c, digs, i s := ="&#" | return err("'&#' expected") if any('x') then { s ||:= move(1) c := &digits ++ 'abcdefABCDEF' any(c) | return err(c || " expected") digs := tab(many(c)) i := format_string_to_int(digs, 16) | 63 } else { c := &digits any(c) | return err(c || " expected") digs := tab(many(c)) i := integer(digs) | 63 } s ||:= digs s ||:= =";" | return err("';' expected") if /expand then return s else { if i = (16r9 | 16rA | 16rD) | (16r20 <= i <= 16rFF) then return char(i) if (16rFF < i <= 16rD7FF) | (16rE000 <= i <= 16rFFFD) | (16r10000 <= i <= 16r10FFFF) then return "?" return err("invalid char number :" || digs) } end # # [67] Reference ::= EntityRef | CharRef # # @p method parse_reference() if match("&#") then return parse_char_ref() | fail if any('&') then return parse_entity_ref() | fail else return err("'&' expected") end # # [68] EntityRef ::= '&' Name ';' # # @p method parse_entity_ref() local res res := ="&" | return err("'&' expected") res ||:= parse_name() | fail res ||:= =";" | return err("';' expected") return res end # # [69] PEReference ::= '%' Name ';' # # @p method parse_pe_reference() local res res := ="%" | return err("'%' expected") res ||:= parse_name() | fail res ||:= =";" | return err("';' expected") return res end # # Lookup a pe in the table, returning its value. # # @p method lookup_pe_reference(s) local name name := s[2:-1] if member(doc.parameter_entities, name) then return doc.parameter_entities[name] else return err("No such parameter entity defined: " || name) end # # Lookup a general entity in the table, returning its value. # # @p method lookup_entity_ref(s) local name name := s[2:-1] if member(doc.general_entities, name) then return doc.general_entities[name] else return err("No such general entity defined: " || name) end # # Convenience method to take a value from one of the entity tables and return it # as a string, resolving it externally if necessary. # # @p method resolve_entity(o) if \o.str then return normalize_eol(o.str) if \o.notation then return err("unparsed entity not allowed") return normalize_eol(resolve(o.external_id)) end # # [70] EntityDecl ::= GEDecl | PEDecl # [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' # [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' # # @p method parse_entity_decl() local name, val tab(dtd_match("<!ENTITY")) | return err("'<!ENTITY' expected") # # This could be more sophisticated to try to cover for example # <!ENTITY%x; abc 'def > where %x -> ' %' # spaces() | fail if any('%') then { ="%" dtd_spaces() | fail name := dtd_parse_name() | fail dtd_spaces() | fail val := parse_pe_def() | fail # Second and subsequent definitions generate a warning, and are ignored. if member(doc.parameter_entities, name) then warn("multiple parameter entity definitions for " || name) else insert(doc.parameter_entities, name, val) } else { name := dtd_parse_name() | fail dtd_spaces() | fail val := parse_entity_def() | fail # Second and subsequent definitions generate a warning, and are ignored. if member(doc.general_entities, name) then warn("multiple parameter entity definitions for " || name) else insert(doc.general_entities, name, val) } dtd_opt_spaces() tab(dtd_match(">")) | return err("'>' expected") return end # # [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) # return a string or an ExternalID # # @p method parse_entity_def() local res res := EntityDef() res.in_ext_subset := in_ext_subset if dtd_match("SYSTEM" | "PUBLIC") then { res.external_id := parse_external_id() | fail if looking_at_space_then("NDATA") then { res.notation := parse_ndata_decl() | fail member(doc.notation_declarations, res.notation) | invalid("NDATA declaration " || res.notation || " not declared as a NOTATION") } } else res.str := parse_entity_value() | fail return res end # # [74] PEDef ::= EntityValue | ExternalID # return a string or an ExternalID # # @p method parse_pe_def() local res res := EntityDef() res.in_ext_subset := in_ext_subset if dtd_match("SYSTEM" | "PUBLIC") then res.external_id := parse_external_id() | fail else res.str := parse_entity_value() | fail return res end # # # [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral # # @p method parse_external_id() res := ExternalID() if tab(dtd_match("SYSTEM")) then { dtd_spaces() | fail res.system_id := parse_system_literal() | fail } else if tab(dtd_match("PUBLIC")) then { dtd_spaces() | fail res.public_id := parse_pubid_literal() | fail dtd_spaces() | fail res.system_id := parse_system_literal() | fail } else return err("SYSTEM or PUBLIC expected") return res end # # [76] NDataDecl ::= S 'NDATA' S Name # # @p method parse_ndata_decl() local n dtd_spaces() | fail tab(dtd_match("NDATA")) | return err("'NDATA' expected") dtd_spaces() n := dtd_parse_name() | fail return n end # # [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' # # @p method parse_text_decl() ="<?xml" | return err("'<?xml' expected") if looking_at_space_then("version") then parse_version_info() | fail parse_encoding_decl() | fail opt_spaces() ="?>" | return err("'?>' expected") return end # # [78] extParsedEnt ::= TextDecl? content # # @p method parse_external_parsed_entity(parent, circle) if match("<?xml") then parse_text_decl() | fail return parse_content(parent, circle) end # # [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) # # @p method parse_encoding_decl() spaces() | fail ="encoding" | return err("'encoding' expected") parse_eq() | fail any('\'\"') | return err("\' or \" expected") c := move(1) res := parse_enc_name() | fail if not(=c) then return err(c || " expected") return res end # # [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* # # @p method parse_enc_name() local c1, c2 c1 := &lcase ++ &ucase c2 := &lcase ++ &ucase ++ &digits ++ '._-' any(c1) | return err(c1 || " expected") return tab(many(c2)) end # # [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' # [83] PublicID ::= 'PUBLIC' S PubidLiteral # # @p method parse_notation_decl() local res, name res := NotationDecl() tab(dtd_match("<!NOTATION")) | return err("'<!NOTATION' expected") dtd_spaces() | fail name := dtd_parse_name() | fail dtd_spaces() | fail if tab(dtd_match("PUBLIC")) then { dtd_spaces() | fail pub := parse_pubid_literal() | fail if looking_at_space_then_cset('\'\"') then { dtd_spaces() | fail sys := parse_system_literal() | fail res.external_id := ExternalID(sys, pub) } else res.public_id := pub } else if dtd_match("SYSTEM") then res.external_id := parse_external_id() | fail else return err("SYSTEM or PUBLIC expected") dtd_opt_spaces() tab(dtd_match(">")) | return err("'>' expected") member(doc.notation_declarations, name) | insert(doc.notation_declarations, name, res) return res end # # S? # # @p method opt_spaces() if any(xml_space) then return spaces() else return "" end method dtd_opt_spaces() if dtd_any(xml_space) then return dtd_spaces() else return "" end # # Validate idrefs # # @p method validate_idrefs() local x every x := !sort(doc.id_attribute_refs) do member(doc.id_attribute_values, x) | invalid("an IDREF(S) attribute " || x || " was used, which does not match any IDs") end # # Validate the given element child content; this may fail on error or just emit a warning # method validate_children(el, parent_el) local element_decl, xs, tl # # Validate the element structure # if not member(doc.element_declarations, el.name) then { invalid(el.name || " is an undeclared element name") return } element_decl := doc.element_declarations[el.name] # # If the spec isn't a mixed spec (ie one which contains #PCDATA), # then we remove all whitespace-only string elements in the child list. # if element_decl.content_spec.isnt_mixed() then { # # Possibly change children to be a new list with the whitespace-only # element removed. # tl := el.get_children_no_whitespace() if *tl ~= *el.children then { el.children := tl # # A standalone document is not allowed to have w/s removed for an element # when that occurs as a result of a ELEMENT declaration in an external subset. # if \doc.standalone & \element_decl.in_ext_subset then invalid(el.name || " had insignificant whitespace removed in a standalone document") } } if not(element_decl.content_spec.pattern_match_element(el)) then invalid(el.name || " does not match the declared pattern : " || element_decl.content_spec.to_string()) # # If appropriate, put the children back to equal to the original whitespace_children; # the former may have changed above. # if \el.xml_space_preserve | \self.preserve_insignificant_whitespace_flag then { el.children := el.whitespace_children } end # # Validate the attributes of an element # method validate_attributes(el) local attlist, x if not(member(doc.attribute_lists, el.name)) then { if *el.attributes ~= 0 then invalid(el.name || " has attributes but none were declared") return } # # The AttList for this particular element type. # attlist := doc.attribute_lists[el.name] # # Check for any missing required attributes, and add any default attributes. # every x := !sort(attlist.attribute_defs) do { if member(el.attributes, x[1]) then { # A #FIXED attribute value must match the default. if x[2].default_decl === "#FIXED" then { if el.attributes[x[1]] ~=== x[2].default_value then invalid("attribute value " || el.attributes[x[1]] || " doesn't match the defined FIXED value " || x[2].default_value) } } else { # A #REQUIRED attribute must be present. if x[2].default_decl === "#REQUIRED" then invalid("the attribute " || x[1] || " is required, but not present") else { # Cannot insert a default from an attdef defined in the external subset, if standalone is "yes" if \doc.standalone & \x[2].in_ext_subset then invalid("cannot use a default from an attribute declared externally, in a standalone document") # Insert any default value. insert(el.attributes, x[1], \x[2].default_value) } } } # # Validate each of the attributes values. # every x := !sort(el.attributes) do { if member(attlist.attribute_defs, x[1]) then { validate_attribute_value(el.name, attlist.attribute_defs[x[1]], x[1], x[2]) } else { invalid(x[1] || " is not a defined attribute for element " || el.name) } } end method validate_attribute_value(element_name, attdef, name, val) case attdef.def_type of { "CDATA" : { } "ID" : { if val ? not(parse_name(1) & pos(0)) then invalid("attribute value " || val || " must be a valid name") # Store the value of the id if member(doc.id_attribute_values, val) then invalid("duplicated ID attribute value: " || val) else insert(doc.id_attribute_values, val) } "IDREF" : { if val ? not(parse_name(1) & pos(0)) then invalid("attribute value " || val || " must be a valid name") insert(doc.id_attribute_refs, val) } "IDREFS" : { if val ? (l := parse_names(1) & pos(0)) then every insert(doc.id_attribute_refs, !l) else invalid("attribute value " || val || " must be comprised of valid names") } "ENTITY" : { if val ? (parse_name(1) & pos(0)) then { if member(doc.general_entities, val) then { entity_def := doc.general_entities[val] if /entity_def.notation then invalid("attribute val " || val || " does not match an external entity") } else invalid("attribute val " || val || " does not match an external entity") } else invalid("attribute value " || val || " must be a valid name") } "ENTITIES" : { if val ? (l := parse_names(1) & pos(0)) then { every n := !l do { if member(doc.general_entities, n) then { entity_def := doc.general_entities[n] if /entity_def.notation then invalid("attribute val " || n || " does not match an external entity") } else invalid("attribute val " || n || " does not match an external entity") } } else invalid("attribute value " || val || " must be comprised of valid names") } "NMTOKEN" : { if val ? not(parse_nmtoken(1) & pos(0)) then invalid("attribute value " || val || " must be a valid nmtoken") } "NMTOKENS" : { if val ? not(parse_nmtokens(1) & pos(0)) then invalid("attribute value " || val || " must be comprised of valid nmtokens") } "ENUMERATION" : { if not(member(attdef.def_set, val)) then invalid("attribute value " || val || " is not one of the declared enumerations") } "NOTATION" : { if not(member(attdef.def_set, val)) then invalid("attribute value " || val || " is not one of the declared notations") } } end method start_divert(id) push(divert_stack, Diversion(current_divert_id, &subject, &pos)) current_divert_id := id end method end_divert() current_divert_id := pop(divert_stack).id end # # A warning # method warn(s) local stack # No warnings after a fatal error. if \reason then fail stack := copy(divert_stack) push(stack, Diversion(current_divert_id, &subject, &pos)) error_handler.warning(s, stack) doc.warnings +:= 1 fail end # # A validity error # method invalid(s) local stack # Ignore if not validating if /validate_flag then fail # None after a fatal error. if \reason then fail stack := copy(divert_stack) push(stack, Diversion(current_divert_id, &subject, &pos)) error_handler.validity_error(s, stack) doc.validity_errors +:= 1 fail end # # A fatal error # method err(s) local stack # Ensure only one fatal error is ever sent. if \reason then fail error(s) stack := copy(divert_stack) push(stack, Diversion(current_divert_id, &subject, &pos)) error_handler.fatal_error(s, stack) fail end # # Set the error handler # method set_error_handler(error_handler) return self.error_handler := error_handler end # # Get the error handler # method get_error_handler() return self.error_handler end # # Utility: succeed if we are looking at S s, fail otherwise. # Does not change &pos. # # @p method looking_at_space_then(s) local res any(xml_space) | fail i := &pos tab(many(xml_space)) res := match(s) tab(i) return \res end # # DTD version of above # # @p method dtd_looking_at_space_then(s) local res dtd_any(xml_space) | fail i := &pos tab(dtd_many(xml_space)) res := dtd_match(s) tab(i) return \res end # # As above, but looks for chars in the set. # # @p method looking_at_space_then_cset(c) local res any(xml_space) | fail i := &pos tab(many(xml_space)) res := any(c) tab(i) return \res end # # DTD version of above # # @p method dtd_looking_at_space_then_cset(c) local res dtd_any(xml_space) | fail i := &pos tab(dtd_many(xml_space)) res := dtd_any(c) tab(i) return \res end # # Equivalent to match(), but checks for pe expansions # # @p method dtd_match(s) i := &pos repeat { if x := match(s) then return x every c := !s do { =c | break } try_dtd_insert(i) | fail } end # # Equivalent to any(), but checks for pe expansions # # @p method dtd_any(c) i := &pos repeat { if x := any(c) then return x try_dtd_insert(i) | fail } end # # Equivalent to many(), but checks for pe expansions # # @p method dtd_many(c) i := &pos repeat { if x := many(c) then { tab(x) if not try_dtd_insert(i) then { &pos := i return x } } else try_dtd_insert(i) | fail } end # # Equivalent to many_star(), but checks for pe expansions # # @p method dtd_many_star(c) if dtd_any(c) then return dtd_many(c) else return &pos end # # If the current char is %, try and insert a pe value. If successful, return # otherwise fail. &pos is always set to i on exit. # # @p method try_dtd_insert(i) if any('%') then { if /in_ext_subset then { # Only show an error once; this prevents the same error message # being produced several times. if /dtd_insert_shown_error then invalid("Cannot reference a pe except in the external dtd") dtd_insert_shown_error := 1 fail } j := &pos if t := resolve_entity(lookup_pe_reference(parse_pe_reference())) then { # The entity is expanded by one leading and one trailing space (4.4.8) &subject := &subject[1:j] || " " || t || " " || &subject[&pos:0] &pos := i return } } &pos := i fail end # # Resolve the given external_id, using the current resolver # # @p method resolve(external_id) return resolver.resolve(external_id) | return err("Couldn't resolve: " || external_id.to_string() || ":" || resolver.get_reason()) end # # This method post-processes the parsed tree to fill in the # global names in the XmlElement structures. # # @p method do_namespace_processing() local t t := table() insert(t, "xml", "http://www.w3.org/XML/1998/namespace") return do_namespace_processing_element(t, doc.get_root_element()) end # # Do namespace processing for a single element. # @param resolve_table a table of mappings from namespace prefixes # to URI's to use for the conversion. # @param el the element to process # # @p method do_namespace_processing_element(resolve_table, el) # # Could be comment, cdata # if el.get_type() ~== "element" then return # # Make the map from namespace prefixes -> uri's # el.namespace_declarations := make_declarations_map(el) | fail if *el.namespace_declarations > 0 then { # # Create a new resolve_table for this level. It is a copy of the # parent, plus any defs at this level. # resolve_table := copy(resolve_table) every k := key(el.namespace_declarations) do { insert(resolve_table, k, el.namespace_declarations[k]) } } # # Convert the element to create the global name and attribute # table. # convert_element(resolve_table, el) | fail # # Process the children # every sub := !el.children do { if not(string(sub)) then { do_namespace_processing_element(resolve_table, sub) | fail } } return el end # # Convert a single element. This completes the global name field and # sets up the global attributes. # # @p method convert_element(resolve_table, el) el.global_name := convert_name(resolve_table, el.name) | fail every k := key(el.attributes) do { k ? { if ="xmlns" then { next } insert(el.attributes_global_name, convert_name(resolve_table, k), el.attributes[k]) | fail } } return end # # Given a name "eg SOAP-ENV:Envelope", return the global name, using the # given resolve table for the conversion. The returned value is a # GlobalName instance. # # @p method convert_name(resolve_table, name) local prefix, local_name, uri name ? { if prefix := tab(upto(':')) then { move(1) local_name := tab(0) member(resolve_table, prefix) | return err("Couldn't resolve namespace: " || prefix) uri := resolve_table[prefix] return GlobalName(local_name, uri) } if member(resolve_table, "") then { uri := resolve_table[""] if *uri > 0 then return GlobalName(name, uri) } return GlobalName(name) } end # # Make a map of namespace id's to uri's from the attributes in the # given element. # # @p method make_declarations_map(el) local k, uri, prefix t := table() every k := key(el.attributes) do { k ? { if ="xmlns" then { uri := el.attributes[k] if pos(0) then { insert(t, "", uri) } else if =":" then { prefix := tab(0) if *prefix = 0 then return err("Badly formed xmlns attribute: " || k) if match("xml", map(prefix)) then return err("xmlns namespace can't start with 'xml': " || k) insert(t, prefix, uri) } else return err("Badly formed xmlns attribute: " || k) } } } return t end initially() initial { init_xml_globals() } resolver := DefaultResolver() error_handler := DefaultErrorHandler() do_namespaces_flag := 1 validate_flag := 1 preserve_comments_flag := 1 end procedure init_xml_globals() if \xml_space then return xml_space := ' \t\n\r' # [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] xml_char := '\t\n\r' ++ &cset[33:0] # [84] Letter ::= BaseChar | Ideographic # [85] BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] |..... xml_letter := &lcase ++ &ucase every c := (16rC0 to 16rD6) | (15rD8 to 16rF6) | (16rF8 to 16rFF) do xml_letter ++:= char(c) # [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender xml_name_char := xml_letter ++ &digits ++ '.-_:' xml_name_start := xml_letter ++ '_:' # [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] xml_pubid_char := ' \n\r' ++ &lcase ++ &ucase ++ &digits ++ '-\'()+,./:=?;!*#@$_%' end procedure comingup(i, j) /j := 0 return &subject[&pos+j +: i] end # # Like many, but succeeds with empty result if zero matching char. Many needs # one or more. # procedure many_star(c) if any(c) then return many(c) else return &pos end