A minimal XML Schema tutorial with Examples 

Joined:
04/09/2007
Posts:
753

April 26, 2011 20:12:01    Last update: April 28, 2011 15:28:12
An XML schema is a definition of XML files, in XML. It plays the same role as old-time DTDs.

  1. Overall, an XML schema file looks like this:
    <schema
      attributeFormDefault = (qualified | unqualified) : unqualified
      blockDefault = (#all | List of (extension | restriction | substitution))  : ''
      elementFormDefault = (qualified | unqualified) : unqualified
      finalDefault = (#all | List of (extension | restriction | list | union))  : ''
      id = ID
      targetNamespace = anyURI
      version = token
      xml:lang = language
      {any attributes with non-schema namespace . . .}>
      Content: (
        (include | import | redefine | annotation)*, 
        (
    	(
    	    (simpleType | complexType | group | attributeGroup) | 
    	    element | 
    	    attribute | 
    	    notation
    	),
    	annotation*
        )*
      )
    </schema>
    

    The attribute meanings:
    • targetNamespace: The name space targeted by the current schema definition. It can be any URI.
    • id and version: For user convenience, the W3C spec defines no semantics for them.
    • xml:lang: Natural language identifier defined by RFC 3306.
    • attributeFormDefault and elementFormDefault: Set default values for the form attribute for attribute and element declarations.
    • blockDefault and finalDefault: Set default values for the block and final attributes for attribute and element declarations.


  2. The W3C defined some built-in datatypes. Examples of primitive datatypes are: string, boolean, decimal, float, double. Some examples of derived datatypes are: normalizedString, token, NMTOKEN, inteter, int long.

    For example, integer is derived from decimal by limiting the value to an integral and eliminating the decimal point, long is derived from integer by limiting the maximum value, int is derived from long by reducing the maximum value further, etc.

  3. You can define your own simple types:
    <!-- define stringtype as alias to string -->
    <simpleType name="stringtype">
        <restriction base="string"/>
    </simpleType>
    
    <!-- SKU is string matching specified regex -->
    <simpleType name="SKU">
        <restriction base="string">
    	<pattern value="\d{3}-[A-Z]{2}"/>
        </restriction>
    </simpleType>
    
    <!-- String enumeration -->
    <simpleType name="dispatcherType">
        <restriction base="string">
    	<enumeration value="FORWARD"/>
    	<enumeration value="INCLUDE"/>
    	<enumeration value="REQUEST"/>
    	<enumeration value="ERROR"/>
        </restriction>
    </simpleType>
    


    Or complex types:
    <!-- A composite type -->
    <complexType name="us-address">
        <annotation>
    	<documentation>
    	Defines a US address
    	</documentation>
        </annotation>
    
        <sequence>
    	<element name="number" type="string"/>
    	<element name="street" type="string"/>
    	<element name="city" type="string"/>
    	<element name="state">
    	    <simpleType>
    		<restriction base="string">
    		    <pattern value="[A-Z]{2}"/>
    		</restriction>
    	    </simpleType>
    	</element>
    	<element name="zip">
    	    <simpleType>
    		<restriction base="string">
    		    <pattern value="\d{5}"/>
    		</restriction>
    	    </simpleType>
    	</element>
        </sequence>
    </complexType>
    
    <!-- A list of 0 or more addresses -->
    <!-- Assume the namespace prefix for the above defined type is mytype: -->
    <complexType name="addresses">
        <sequence>
    	<element name="address" type="mytype:us-address" minOccurs="0" maxOccurs="unbounded"/>
        </sequence>
    </complexType>
    

    Or attribute:
    <complexType name="AttributeTest">
        <!-- ID is a built-in derived type -->
        <attribute name="id" type="ID"/>
    
        <!-- provide a default value -->
        <attribute name="lang" type="string" default="EN"/>
    
        <!-- Make value of attribute unchangeable -->
        <attribute name="country" type="string" fixed="US"/>
    
        <!-- Mark attribute as required -->
        <attribute name="email" type="string" use="required"/>
    </complexType>
    


  4. A complete example:
    <?xml version="1.0" encoding="UTF-8"?>
    <schema xmlns="http://www.w3.org/2001/XMLSchema"
    	targetNamespace="http://example.com/ns/contact"
    	xmlns:cntct="http://example.com/ns/contact"
    	elementFormDefault="qualified"
    	version="1.0">
        <annotation>
    	<documentation>
    	    Contact Definition 1.0
    	</documentation>
        </annotation>
    
        <element name="contact" type="cntct:ContactType"/>
    
        <complexType name="ContactType">
    	<sequence>
    	    <element name="first-name" type="string"/>
    	    <element name="last-name" type="string"/>
    	    <element name="address" type="cntct:USAddress"/>
    	</sequence>
        </complexType>
    
        <complexType name="USAddress">
    	<sequence>
    	    <element name="number" type="string"/>
    	    <element name="street" type="string"/>
    	    <element name="city" type="string"/>
    	    <element name="state" type="string"/>
    	    <element name="zip" type="string"/>
    	</sequence>
        </complexType>
    </schema>
    

    Or equivalently (switching default namespace):
    <?xml version="1.0" encoding="UTF-8"?>
    <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
    	targetNamespace="http://example.com/ns/contact"
    	xmlns="http://example.com/ns/contact"
    	elementFormDefault="qualified"
    	version="1.0">
        <xsd:annotation>
    	<xsd:documentation>
    	    Contact Definition 1.0
    	</xsd:documentation>
        </xsd:annotation>
    
        <xsd:element name="contact" type="ContactType"/>
    
        <xsd:complexType name="ContactType">
    	<xsd:sequence>
    	    <xsd:element name="first-name" type="xsd:string"/>
    	    <xsd:element name="last-name" type="xsd:string"/>
    	    <xsd:element name="address" type="USAddress"/>
    	</xsd:sequence>
        </xsd:complexType>
    
        <xsd:complexType name="USAddress">
    	<xsd:sequence>
    	    <xsd:element name="number" type="xsd:string"/>
    	    <xsd:element name="street" type="xsd:string"/>
    	    <xsd:element name="city" type="xsd:string"/>
    	    <xsd:element name="state" type="xsd:string"/>
    	    <xsd:element name="zip" type="xsd:string"/>
    	</xsd:sequence>
        </xsd:complexType>
    </xsd:schema>
    


    Both schema validates with this XML instance:
    <?xml version="1.0" encoding="UTF-8"?>
    <contact xmlns="http://example.com/ns/contact"> 
        <first-name>Jack</first-name>
        <last-name>Smith</last-name>
        <address>
    	<number>1234</number>
    	<street>N. 14th</street>
    	<city>Chicago</city>
    	<state>IL</state>
    	<zip>60634</zip>
        </address>
    </contact>
    


    Or this:
    <?xml version="1.0" encoding="UTF-8"?>
    <cntct:contact xmlns:cntct="http://example.com/ns/contact"> 
        <cntct:first-name>Jack</cntct:first-name>
        <cntct:last-name>Smith</cntct:last-name>
        <cntct:address>
    	<cntct:number>1234</cntct:number>
    	<cntct:street>N. 14th</cntct:street>
    	<cntct:city>Chicago</cntct:city>
    	<cntct:state>IL</cntct:state>
    	<cntct:zip>60634</cntct:zip>
        </cntct:address>
    </cntct:contact>
    


  5. Are you wondering about what elementFormDefault="qualified" is doing in the schema element? Well, if you remove that from the schema file, the XML files above no longer validate. You have to change the XML file to:
    <?xml version="1.0" encoding="UTF-8"?>
    <c:contact xmlns:c="http://example.com/ns/contact"> 
        <first-name>Jack</first-name>
        <last-name>Smith</last-name>
        <address>
    	<number>1234</number>
    	<street>N. 14th</street>
    	<city>Chicago</city>
    	<state>IL</state>
    	<zip>60634</zip>
        </address>
    </c:contact>
    

    which validates with both XML schema variations above - after elementFormDefault="qualified" is removed.


References:
  1. XML Schema Structures
  2. XML Schema Datatypes
  3. Java webapp 2.5 schema

Share |
| Comment  | Tags