Implementing IXmlWriter Part 9: Supporting WriteStartDocument() and WriteEndDocument()

This is part 9 of my Implementing IXmlWriter post series.

Today I will add support for the functions WriteStartDocument() and WriteEndDocument() to last time’s IXmlWriter.

WriteStartDocument() writes the XML declaration (i.e. <?xml version="1.0"?>) and WriteEndDocument() closes all open attributes and elements and sets the IXmlWriter back in the initial state. Adding support for these functions is straightforward. Note that I have introduced a new IXmlWriter state called WriteState_Prolog; this will be important later.

Here’s the test case:

StringXmlWriter xmlWriter;

xmlWriter.WriteStartDocument();
  xmlWriter.WriteStartElement("root");
    xmlWriter.WriteStartElement("child");
      xmlWriter.WriteStartAttribute("att");
        xmlWriter.WriteString("value");
xmlWriter.WriteEndDocument();

std::string strXML = xmlWriter.GetXmlString();
// strXML should be <?xml version="1.0"?><root><child att="value"/></root>

Here’s the new header file:

// StringXmlWriter.h

class StringXmlWriter
{
private:
    enum WriteState
    {
        WriteState_Attribute, // An attribute value is being written
        WriteState_Content, // Element content is being written
        WriteState_Element, // An element start tag has been written (and is unclosed)
        WriteState_Prolog, // The prolog is being written
        WriteState_Start, // No Write() methods have been called
    };

    WriteState m_writeState;
    std::stack<std::string> m_openedElements;
    std::string m_xmlStr;

public:
    StringXmlWriter();

    std::string GetXmlString() const;
    void WriteAttributeString(const std::string& localName,
                              const std::string& text);
    void WriteElementString(const std::string& localName,
                            const std::string& text);
    void WriteEndAttribute();
    void WriteEndDocument();
    void WriteEndElement();
    void WriteStartAttribute(const std::string& localName);
    void WriteStartDocument();
    void WriteStartElement(const std::string& localName);
    void WriteString(const std::string& text);

private:
    // Disable copy construction and assignment
    StringXmlWriter(const StringXmlWriter&);
    StringXmlWriter& operator=(const StringXmlWriter&);
};

Here’s the new implementation file:

// StringXmlWriter.cpp

#include "StringXmlWriter.h"

#define ARRAYSIZE(x) ( sizeof(x) / sizeof(x[0]) )

struct CharTranslation
{
    char OriginalChar;
    const char* ReplacementString;
};

static const CharTranslation AttributeValueTranslations[] =
{
    { '"', "&quot;" },
    { '&', "&amp;" },
};

static const CharTranslation CharDataTranslations[] =
{
    { '&', "&amp;" },
    { '<', "&lt;" },
    { '>', "&gt;" },
};

struct OriginalCharEquals :
    public std::binary_function<CharTranslation, char, bool>
{
    bool operator() (const CharTranslation& translation, char ch) const
    {
        return (translation.OriginalChar == ch);
    }
};

static std::string TranslateString(const std::string& originalStr,
                                   const CharTranslation* translations,
                                   int numTranslations)
{
    // Actually one past end, needed for proper std::find_if semantics
    const CharTranslation* endTranslations = translations + numTranslations;

    std::string translatedStr;

    for (std::string::const_iterator stringIter = originalStr.begin();
         stringIter != originalStr.end();
         ++stringIter)
    {
        char ch = *stringIter;

        const CharTranslation* translation = std::find_if
            (
            translations,
            endTranslations,
            std::bind2nd(OriginalCharEquals(), ch)
            );
        if (translation != endTranslations)
        {
            translatedStr += translation->ReplacementString;
        }
        else
        {
            translatedStr += ch;
        }
    }

    return translatedStr;
}

StringXmlWriter::StringXmlWriter() : m_writeState(WriteState_Start)
{
}

std::string StringXmlWriter::GetXmlString() const
{
    return m_xmlStr;
}

void StringXmlWriter::WriteAttributeString(const std::string& localName,
                                           const std::string& text)
{
    WriteStartAttribute(localName);
    WriteString(text);
    WriteEndAttribute();
}

void StringXmlWriter::WriteElementString(const std::string& localName,
                                         const std::string& text)
{
    WriteStartElement(localName);
    WriteString(text);
    WriteEndElement();
}

void StringXmlWriter::WriteEndAttribute()
{
    switch (m_writeState)
    {
    case WriteState_Attribute:
        m_xmlStr += '"';
        m_writeState = WriteState_Element;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteEndDocument()
{
    switch (m_writeState)
    {
    case WriteState_Attribute:
        WriteEndAttribute();
        // FALL THROUGH
    case WriteState_Element:
        while (!m_openedElements.empty())
        {
            WriteEndElement();
        }
        break;
    default:
        // TODO: Generate error
        break;
    }

    m_writeState = WriteState_Start;
}

void StringXmlWriter::WriteEndElement()
{
    switch (m_writeState)
    {
    case WriteState_Content:
        {
            m_xmlStr += “</”;
            m_xmlStr += m_openedElements.top();
            m_xmlStr += ‘>’;
            m_openedElements.pop();
            m_writeState = WriteState_Content;
            break;
        }
    case WriteState_Element:
        {
            m_xmlStr += “/>”;
            m_openedElements.pop();
            m_writeState = WriteState_Content;
            break;
        }
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteStartAttribute(const std::string& localName)
{
    switch (m_writeState)
    {
    case WriteState_Element:
        m_xmlStr += ‘ ‘;
        m_xmlStr += localName;
        m_xmlStr += “=”";
        m_writeState = WriteState_Attribute;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteStartDocument()
{
    switch (m_writeState)
    {
    case WriteState_Start:
        m_xmlStr += “<?xml version=”1.0”?>”;
        m_writeState = WriteState_Prolog;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteStartElement(const std::string& localName)
{
    switch (m_writeState)
    {
    case WriteState_Element:
        // An element is currently open.  Close the element so we can open
        // a new one.
        m_xmlStr += ‘>’;
        // FALL THROUGH
    case WriteState_Content:
    case WriteState_Prolog:
    case WriteState_Start:
        m_openedElements.push(localName);
        m_xmlStr += ‘<’;
        m_xmlStr += localName;
        m_writeState = WriteState_Element;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteString(const std::string& text)
{
    switch (m_writeState)
    {
    case WriteState_Attribute:
        m_xmlStr += TranslateString
            (
            text,
            AttributeValueTranslations,
            ARRAYSIZE(AttributeValueTranslations)
            );
        break;
    case WriteState_Element:
        // An element is currently open.  Close the element so we can start
        // writing the element content.
        m_xmlStr += ‘>’;
        m_writeState = WriteState_Content;
        // FALL THROUGH
    case WriteState_Content:
        m_xmlStr += TranslateString
            (
            text,
            CharDataTranslations,
            ARRAYSIZE(CharDataTranslations)
            );
        break;
    default:
        // TODO: Generate error
        break;
    }
}
Advertisements

Implementing IXmlWriter Part 8: Supporting WriteStartAttribute() and WriteEndAttribute()

This is part 8 of my Implementing IXmlWriter post series.

Today I will add support for the functions WriteStartAttribute() and WriteEndAttribute() to last time’s IXmlWriter.

These functions are (obviously) used to denote the start and end of an attribute; the attribute value is written using WriteString() (this usage is analogous to WriteStartElement() and WriteEndElement()). Because WriteString() must now be aware of whether it is writing an attribute value or element content, I must keep track of the state the IXmlWriter is in — a change that affects nearly every function.

The current state of the IXmlWriter is very important. Not only does it determine what type of encoding WriteString() must use, it also determines what operations are valid at what time. For example, it doesn’t make sense for a user to call WriteAttributeString() unless an element is still opened. In other words, the function call sequence WriteStartElement(), WriteString(), WriteAttributeString() is clearly illegal. To this end, I define a number of different states (see enum WriteState below), and most functions check this state to determine whether or not they should allow the action. Generating errors for illegal operations given the current IXmlWriter state is unimplemented but left as a TODO comment. Note my extensive use of the switch statement; I even use it for consistency’s sake when it may be a tiny bit less code to use if/else (e.g. WriteEndAttribute()). Also note that I use the switch statement’s fall-through feature to allow processing to continue as if the IXmlWriter is in a different state; these occurrences are clearly indicated with a // FALL THROUGH comment.

Here’s the test case:

StringXmlWriter xmlWriter;

xmlWriter.WriteStartElement("root");
  xmlWriter.WriteStartElement("element");
    xmlWriter.WriteStartAttribute("att");
      xmlWriter.WriteString("\"&");
    xmlWriter.WriteEndAttribute();
  xmlWriter.WriteEndElement();
xmlWriter.WriteEndElement();

std::string strXML = xmlWriter.GetXmlString();
// strXML should be <root><element att="&quot;&amp;"/></root>

Here’s the new header file:

// StringXmlWriter.h

class StringXmlWriter
{
private:
    enum WriteState
    {
        WriteState_Attribute, // An attribute value is being written
        WriteState_Content, // Element content is being written
        WriteState_Element, // An element start tag has been written (and is unclosed)
        WriteState_Start, // No Write() methods have been called
    };

    WriteState m_writeState;
    std::stack<std::string> m_openedElements;
    std::string m_xmlStr;

public:
    StringXmlWriter();

    std::string GetXmlString() const;
    void WriteAttributeString(const std::string& localName,
                              const std::string& text);
    void WriteElementString(const std::string& localName,
                            const std::string& text);
    void WriteEndAttribute();
    void WriteEndElement();
    void WriteStartAttribute(const std::string& localName);
    void WriteStartElement(const std::string& localName);
    void WriteString(const std::string& text);

private:
    // Disable copy construction and assignment
    StringXmlWriter(const StringXmlWriter&);
    StringXmlWriter& operator=(const StringXmlWriter&);
};

Here’s the new implementation file:

// StringXmlWriter.cpp

#include "StringXmlWriter.h"

#define ARRAYSIZE(x) ( sizeof(x) / sizeof(x[0]) )

struct CharTranslation
{
    char OriginalChar;
    const char* ReplacementString;
};

static const CharTranslation AttributeValueTranslations[] =
{
    { '"', "&quot;" },
    { '&', "&amp;" },
};

static const CharTranslation CharDataTranslations[] =
{
    { '&', "&amp;" },
    { '<', "&lt;" },
    { '>', "&gt;" },
};

struct OriginalCharEquals :
    public std::binary_function<CharTranslation, char, bool>
{
    bool operator() (const CharTranslation& translation, char ch) const
    {
        return (translation.OriginalChar == ch);
    }
};

static std::string TranslateString(const std::string& originalStr,
                                   const CharTranslation* translations,
                                   int numTranslations)
{
    // Actually one past end, needed for proper std::find_if semantics
    const CharTranslation* endTranslations = translations + numTranslations;

    std::string translatedStr;

    for (std::string::const_iterator stringIter = originalStr.begin();
         stringIter != originalStr.end();
         ++stringIter)
    {
        char ch = *stringIter;

        const CharTranslation* translation = std::find_if
            (
            translations,
            endTranslations,
            std::bind2nd(OriginalCharEquals(), ch)
            );
        if (translation != endTranslations)
        {
            translatedStr += translation->ReplacementString;
        }
        else
        {
            translatedStr += ch;
        }
    }

    return translatedStr;
}

StringXmlWriter::StringXmlWriter() :
    m_writeState(WriteState_Start)
{
}

std::string StringXmlWriter::GetXmlString() const
{
    return m_xmlStr;
}

void StringXmlWriter::WriteAttributeString(const std::string& localName,
                                           const std::string& text)
{
    WriteStartAttribute(localName);
    WriteString(text);
    WriteEndAttribute();
}

void StringXmlWriter::WriteElementString(const std::string& localName,
                                         const std::string& text)
{
    WriteStartElement(localName);
    WriteString(text);
    WriteEndElement();
}

void StringXmlWriter::WriteEndAttribute()
{
    switch (m_writeState)
    {
    case WriteState_Attribute:
        m_xmlStr += '"';
        m_writeState = WriteState_Element;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteEndElement()
{
    switch (m_writeState)
    {
    case WriteState_Content:
        {
            m_xmlStr += "</";
            m_xmlStr += m_openedElements.top();
            m_xmlStr += '>';
            m_openedElements.pop();
            m_writeState = WriteState_Content;
            break;
        }
    case WriteState_Element:
        {
            m_xmlStr += "/>";
            m_openedElements.pop();
            m_writeState = WriteState_Content;
            break;
        }
    default:

        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteStartAttribute(const std::string& localName)
{
    switch (m_writeState)
    {
    case WriteState_Element:
        m_xmlStr += ' ';
        m_xmlStr += localName;
        m_xmlStr += "=\"";
        m_writeState = WriteState_Attribute;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteStartElement(const std::string& localName)
{
    switch (m_writeState)
    {
    case WriteState_Element:
        // An element is currently open.  Close the element so we can open
        // a new one.
        m_xmlStr += '>';
        // FALL THROUGH
    case WriteState_Content:
    case WriteState_Start:
        m_openedElements.push(localName);
        m_xmlStr += '<';
        m_xmlStr += localName;
        m_writeState = WriteState_Element;
        break;
    default:
        // TODO: Generate error
        break;
    }
}

void StringXmlWriter::WriteString(const std::string& text)
{
    switch (m_writeState)
    {
    case WriteState_Attribute:
        m_xmlStr += TranslateString
            (
            text,
            AttributeValueTranslations,
            ARRAYSIZE(AttributeValueTranslations)
            );
        break;
    case WriteState_Element:
        // An element is currently open.  Close the element so we can start
        // writing the element content.
        m_xmlStr += '>';
        m_writeState = WriteState_Content;
        // FALL THROUGH
    case WriteState_Content:
        m_xmlStr += TranslateString
            (
            text,
            CharDataTranslations,
            ARRAYSIZE(CharDataTranslations)
            );
        break;
    default:
        // TODO: Generate error
        break;
    }
}

Implementing IXmlWriter Part 7: Cleaning Up

This is part 7 of my Implementing IXmlWriter post series.

Wow, I can’t believe that it’s been over a month already since my last IXmlWriter post. I guess my vacation ruined my exercise plan and my blogging habits. It’s well past time to get back into both.

Rather than introduce a new test case, I’m going to spend today “cleaning up” the previous version of IXmlWriter.

The first cleanup method is trivial but overdue — I will separate the implementation of IXmlWriter from its interface, as a user of IXmlWriter shouldn’t be particularly concerned about its implementation. In other words, I will separate it into a .h and a .cpp file. For now, IXmlWriter will continue to expose some implementation details (e.g. its private members), but these details should change relatively infrequently. If I ever need to completely separate its implementation from its interface, I will consider making it into a COM-like object or using the Pimpl idiom.

Secondly, I will disable the default, compiler-created copy constructor and assignment operator. While they would work under the current implementation, there isn’t a unit test ensuring they will continue to work as I change the implementation. I’m also not exactly sure how useful these abilities are — why would someone want to copy an instance of IXmlWriter? Finally, imagine the confusion if a user accidentally declared a function to use an IXmlWriter rather than an IXmlWriter&. In general, I think it is best to disable a class’s copy constructor and assignment operator by default and enable them only as required.

Finally, and perhaps more controversially, I didn’t like that the definitions of the translations required for character data and attribute values were stored as member variables of the class as they seemed to pollute its “private interface” somewhat. Even worse, I didn’t like that these translations were initialized every time the class is instantiated; the translations are constant data and this initialization seems wasteful. Finally, I’m not so sure a std::map object is the best choice to store these values as it seems unnecessarily complex. Therefore, I decided to store the translations as an array of structs and modify the TranslateString() function accordingly.

Here’s the resulting header file:

// StringXmlWriter.h

class StringXmlWriter
{
private:
    std::stack<std::string> m_openedElements;
    std::string m_xmlStr;
    bool m_unclosedStartElement;

public:
    StringXmlWriter();

    std::string GetXmlString() const;
    void WriteAttributeString(const std::string& localName,
                              const std::string& value);
    void WriteElementString(const std::string& localName,
                            const std::string& value);
    void WriteEndElement();
    void WriteStartElement(const std::string& localName);
    void WriteString(const std::string& value);

private:
    // Disable copy construction and assignment
    StringXmlWriter(const StringXmlWriter&);
    StringXmlWriter& operator=(const StringXmlWriter&);
};

Here’s the implementation file:

// StringXmlWriter.cpp

#include "StringXmlWriter.h"

#define ARRAYSIZE(x) ( sizeof(x) / sizeof(x[0]) )

struct CharTranslation
{
    char OriginalChar;
    const char* ReplacementString;
};

static const CharTranslation AttributeValueTranslations[] =
{
    { '"', "&quot;" },
    { '&', "&amp;" },
};

static const CharTranslation CharDataTranslations[] =
{
    { '&', "&amp;" },
    { '<', "&lt;" },
    { '>', "&gt;" },
};

struct OriginalCharEquals :
    public std::binary_function<CharTranslation, char, bool>
{
    bool operator() (const CharTranslation& translation, char ch) const
    {
        return (translation.OriginalChar == ch);
    }
};

static std::string TranslateString(const std::string& originalStr,
                                   const CharTranslation* translations,
                                   int numTranslations)
{
    // Actually one past end, needed for proper std::find_if semantics
    const CharTranslation* endTranslations = translations + numTranslations;

    std::string translatedStr;
    for (std::string::const_iterator stringIter = originalStr.begin();
         stringIter != originalStr.end();
         ++stringIter) {
        char ch = *stringIter;

        const CharTranslation* translation = std::find_if
            (
            translations,
            endTranslations,
            std::bind2nd(OriginalCharEquals(), ch)
            );
        if (translation != endTranslations) {
            translatedStr += translation->ReplacementString;
        } else {
            translatedStr += ch;
        }
    }

    return translatedStr;
}

StringXmlWriter::StringXmlWriter() : m_unclosedStartElement(false)
{
}

std::string StringXmlWriter::GetXmlString() const
{
    return m_xmlStr;
}

void StringXmlWriter::WriteAttributeString(const std::string& localName,
                                           const std::string& value)
{
    m_xmlStr += ' ';
    m_xmlStr += localName;
    m_xmlStr += "=\"";
    m_xmlStr += TranslateString
        (
        value,
        AttributeValueTranslations,
        ARRAYSIZE(AttributeValueTranslations)
        );
    m_xmlStr += '"';
}

void StringXmlWriter::WriteElementString(const std::string& localName,
                                         const std::string& value)
{
    WriteStartElement(localName);
    WriteString(value);
    WriteEndElement();
}

void StringXmlWriter::WriteEndElement()
{
    if (m_unclosedStartElement) {
        m_xmlStr += "/>";
        m_unclosedStartElement = false;
    } else {
        std::string lastOpenedElement = m_openedElements.top();
        m_xmlStr += "</";
        m_xmlStr += lastOpenedElement;
        m_xmlStr += '>';
    }
    m_openedElements.pop();
}

void StringXmlWriter::WriteStartElement(const std::string& localName)
{
    if (m_unclosedStartElement) {
        m_xmlStr += '>';
        m_unclosedStartElement = false;
    }

    m_openedElements.push(localName);
    m_xmlStr += '<';
    m_xmlStr += localName;
    m_unclosedStartElement = true;
}

void StringXmlWriter::WriteString(const std::string& value)
{
    if (m_unclosedStartElement) {
        m_xmlStr += '>';
        m_unclosedStartElement = false;
    }

    m_xmlStr += TranslateString
        (
        value,
        CharDataTranslations,
        ARRAYSIZE(CharDataTranslations)
        );
}

By the way, if you can figure out a more elegant way to perform the translations, I’d love to hear it. I looked at std::transform but that can only do T->T translations. I wish I could have figured out a way to avoid writing the OriginalCharEquals function object and used something like std::equal_to, but I couldn’t find one which could compare the char OriginalChar member of CharTranslation to a separate char variable. Finally, I think the character-by-character translation method is likely slow and produces a rather large number of reallocations.