[GME-commit] GMESRC/GME/Parser Transcoder.cpp,NONE,1.1 Transcoder.h,NONE,1.1 MgaDumper.cpp,1.13,1.14 MgaDumper.h,1.8,1.9 Parser.dsp,1.11,1.12

gme-commit at list.isis.vanderbilt.edu gme-commit at list.isis.vanderbilt.edu
Fri Jun 11 10:54:30 CDT 2004


Update of /var/lib/gme/GMESRC/GME/Parser
In directory braindrain:/tmp/cvs-serv27179

Modified Files:
	MgaDumper.cpp MgaDumper.h Parser.dsp 
Added Files:
	Transcoder.cpp Transcoder.h 
Log Message:
UTF-8 encoding introduced

Bug related to character data containing ']]>' resolved.




CVS User: zolmol

--- NEW FILE: Transcoder.cpp ---
#include "stdafx.h"

#include "Transcoder.h"
#include <xercesc/util/XMLUniDefs.hpp>


static const XMLCh  gXMLDecl1[] =
{
        chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l
    ,   chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i
    ,   chLatin_o, chLatin_n, chEqual, chDoubleQuote, chDigit_1, chPeriod
    ,   chDigit_0, chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c
    ,   chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual
    ,   chDoubleQuote, chNull
}; // = `<?xml version="1.0" encoding="` 

static const XMLCh  gXMLDecl2[] =
{
        chDoubleQuote, chQuestion, chCloseAngle
    ,   chLF, chNull
}; // = `"?>LF`




// ---------------------------------------------------------------------------
//  Transcoder: Constructors and Destructor
// ---------------------------------------------------------------------------
Transcoder::Transcoder()
	: m_pFormatter(0)
{
}

Transcoder::~Transcoder()
{
	if ( m_pFormatter)
	{
		delete m_pFormatter;
		m_pFormatter = 0;
	}

	if ( is_open()) close();
}

void Transcoder::init( const char * f, const char * const encodingName)
{
	ASSERT( !m_pFormatter);

	XMLPlatformUtils::Initialize();
	
	m_pFormatter = new XMLFormatter
    (
        encodingName
        , 0
        , this
        , XMLFormatter::NoEscapes
        , XMLFormatter::UnRep_CharRef
    );

	ASSERT( !is_open() );

	open( f, ios::out | ios::trunc);
	if( fail() || !is_open() )
		HR_THROW(E_INVALID_FILENAME);

    *m_pFormatter << gXMLDecl1 << m_pFormatter->getEncodingName() << gXMLDecl2; //will dump '<?xml version="1.0" encoding="UTF-8"?> 
}


void Transcoder::finalize()
{
	delete m_pFormatter;
	m_pFormatter = 0;

	close();

	XMLPlatformUtils::Terminate();
}


Transcoder& 
Transcoder::operator <<( Modes mode)
{
    if ( mode == NoEscape)
		*m_pFormatter << XMLFormatter::NoEscapes;
	else if ( mode == StdEscape)
		*m_pFormatter << XMLFormatter::StdEscapes;

	return *this;
}


Transcoder& 
Transcoder::operator <<( const XMLCh* const toWrite)
{
	*m_pFormatter << toWrite;
	return *this;
}

Transcoder& 
Transcoder::operator <<( const char * const toWrite)
{
    XMLCh * fUnicodeForm = XMLString::transcode( toWrite);
	
	operator<<( fUnicodeForm);
	
	delete fUnicodeForm;

	return *this;
}

Transcoder& 
Transcoder::operator <<( const char toWrite)
{
	char tmp[2] = { toWrite, 0 };

	operator<<( tmp);

	return *this;
}

Transcoder& 
Transcoder::operator <<( const string& toWrite)
{
	operator<<( toWrite.c_str());

	return *this;
}

// ---------------------------------------------------------------------------
//  Transcoder: Overrides of the output formatter target interface
// ---------------------------------------------------------------------------
void Transcoder::writeChars(const XMLByte* const toWrite,
                                  const unsigned int count,
                                  XMLFormatter* const formatter)
{
	write( toWrite, count);
}

--- NEW FILE: Transcoder.h ---
#ifndef XML_TRANS_H
#define XML_TRANS_H

#include <fstream.h>
#include    <xercesc/sax/HandlerBase.hpp>
#include    <xercesc/framework/XMLFormatter.hpp>

XERCES_CPP_NAMESPACE_USE

class Transcoder : private XMLFormatTarget, private ofstream
{
public:
    // -----------------------------------------------------------------------
    //  Constructors
    // -----------------------------------------------------------------------
    Transcoder();
    ~Transcoder();

	void init( const char *, const char * const);
	void finalize();
	enum Modes
	{
		NoEscape, // normal transcoding 
		StdEscape // additionally escaping special characters like &<>'"
	};

    // -----------------------------------------------------------------------
    //  Implementations of the format target interface
    // -----------------------------------------------------------------------
    void writeChars
    (
        const   XMLByte* const  toWrite
        , const unsigned int    count
        , XMLFormatter* const   formatter
    );

	
	Transcoder& operator << ( Modes mode);
	Transcoder& operator << ( const XMLCh* const toWrite);
	Transcoder& operator << ( const char * const toWrite);
	Transcoder& operator << ( const char toWrite);
	Transcoder& operator << ( const string& toWrite);

private :
    XMLFormatter* m_pFormatter;
};


#endif // XML_TRANS_H
Index: MgaDumper.cpp
===================================================================
RCS file: /var/lib/gme/GMESRC/GME/Parser/MgaDumper.cpp,v
retrieving revision 1.13
retrieving revision 1.14
diff -C2 -d -r1.13 -r1.14
*** MgaDumper.cpp	2 Jun 2004 20:55:33 -0000	1.13
--- MgaDumper.cpp	11 Jun 2004 14:54:21 -0000	1.14
***************
*** 3,10 ****
  #include "Parser.h"
  #include "MgaDumper.h"
  
- #define FLUSH_LIMIT			1000
  
! #include "map"
  
  // -----------------------------------------------------------
--- 3,10 ----
  #include "Parser.h"
  #include "MgaDumper.h"
+ #include "Transcoder.h"
  
  
! #define FLUSH_LIMIT			1000
  
  // -----------------------------------------------------------
***************
*** 97,104 ****
  		HR_THROW(E_INVALIDARG);
  
! 	ASSERT( !ofs.is_open() );
! 	ofs.open(filename.c_str(), ios::out | ios::trunc);
! 	if( ofs.fail() || !ofs.is_open() )
! 		HR_THROW(E_INVALID_FILENAME);
  
  	elems.clear();
--- 97,101 ----
  		HR_THROW(E_INVALIDARG);
  
! 	ofs.init( filename.c_str(), "UTF-8");
  
  	elems.clear();
***************
*** 118,122 ****
  void CMgaDumper::DoneDump(bool abort)
  {
! 	ofs.close();
  	elems.clear();
  
--- 115,119 ----
  void CMgaDumper::DoneDump(bool abort)
  {
! 	ofs.finalize();
  	elems.clear();
  
***************
*** 146,153 ****
  		InitDump(p, xmlfile);
  
- 		// TODO: Dump into real UTF-8
- 		// ofs << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
- 		//
- 		ofs << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
  		ofs << "<!DOCTYPE project SYSTEM \"mga.dtd\">\n\n";
  
--- 143,146 ----
***************
*** 184,191 ****
  			return S_OK;
  
- 
  		InitDump(project, xmlfile);
  
- 		ofs << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
  		ofs << "<!DOCTYPE clipboard SYSTEM \"mga.dtd\" [\n";
  		ofs << "\t<!ELEMENT clipboard (folder|model|atom|reference|set|connection)*>\n";
--- 177,182 ----
***************
*** 227,249 ****
  // ------- Low level stuff
  
- string CMgaDumper::xmlFilter( const string& in)
- {
- 	map<char, string> m;
- 	m['&'] = "&amp;";
- 	m['>'] = "&gt;";
- 	m['<'] = "&lt;";
- 	m['"'] = "&quot;";
- 	m['\''] = "&apos;";
- 
- 	string ret;
- 	for( string::const_iterator i = in.begin(); i != in.end(); ++i)
- 		if ( m.find( *i) != m.end())
- 			ret += m[*i];
- 		else
- 			ret += *i;
- 
- 	return ret;
- }
- 
  inline void CMgaDumper::Indent(int i)
  {
--- 218,221 ----
***************
*** 280,286 ****
  	ASSERT( !elems.empty() && !elems.back().inbody );
  	
! 	string t = xmlFilter( value); //ZolMol
! 
! 	ofs << ' ' << name << "=\"" << t.c_str() << '"';
  }
  
--- 252,256 ----
  	ASSERT( !elems.empty() && !elems.back().inbody );
  	
! 	ofs << ' ' << name << "=\"" << Transcoder::StdEscape << value << Transcoder::NoEscape << '"';
  }
  
***************
*** 293,299 ****
  	ASSERT( !elems.empty() && !elems.back().inbody && !elems.back().indata );
  
! 	string t = xmlFilter( string( value, len)); // ZolMol
! 	
! 	ofs << ' ' << name << "=\"" << t.c_str() << '"';
  }
  
--- 263,267 ----
  	ASSERT( !elems.empty() && !elems.back().inbody && !elems.back().indata );
  
! 	ofs << ' ' << name << "=\"" << Transcoder::StdEscape << string( value, len) << Transcoder::NoEscape << '"';
  }
  
***************
*** 322,335 ****
  	}
  
! 	bool escaped = HasMarkup(value, len);
! 	if( escaped )
! 		ofs << "<![CDATA[";
  
  	elems.back().indata = true;
  
- 	ofs.write(value, len);
- 
- 	if( escaped )
- 		ofs << "]]>";
  }
  
--- 290,297 ----
  	}
  
! 	ofs << Transcoder::StdEscape << string( value, len) << Transcoder::NoEscape;
  
  	elems.back().indata = true;
  
  }
  
***************
*** 343,349 ****
  			Indent(elems.size()-1);
  
! 		ofs << "</";
! 		ofs.write(elems.back().name.data(), elems.back().name.length());
! 		ofs << ">\n";
  	}
  	else
--- 305,309 ----
  			Indent(elems.size()-1);
  
! 		ofs << "</" << elems.back().name << ">\n";
  	}
  	else
***************
*** 1310,1314 ****
  		if ( false) // clipboard format
  		{
- 			ofs << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
  			ofs << "<!DOCTYPE clipboard SYSTEM \"mga.dtd\" [\n";
  			ofs << "\t<!ELEMENT clipboard (folder|model|atom|reference|set|connection)*>\n";
--- 1270,1273 ----
***************
*** 1321,1325 ****
  		else
  		{
- 			ofs << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
  			ofs << "<!DOCTYPE project SYSTEM \"mga.dtd\">\n\n";
  
--- 1280,1283 ----
***************
*** 1381,1385 ****
  		putInTerritory( parentless_folders);
  
- 		ofs << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n";
  		ofs << "<!DOCTYPE clipboard SYSTEM \"mga.dtd\" [\n";
  		ofs << "\t<!ELEMENT clipboard (folder|model|atom|reference|set|connection)*>\n";
--- 1339,1342 ----

Index: MgaDumper.h
===================================================================
RCS file: /var/lib/gme/GMESRC/GME/Parser/MgaDumper.h,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** MgaDumper.h	2 Jun 2004 20:55:33 -0000	1.8
--- MgaDumper.h	11 Jun 2004 14:54:22 -0000	1.9
***************
*** 6,10 ****
  #include "resource.h"
  #include "CommonVersionInfo.h"
! 
  
  class GmeEqual
--- 6,10 ----
  #include "resource.h"
  #include "CommonVersionInfo.h"
! #include "Transcoder.h"
  
  class GmeEqual
***************
*** 65,69 ****
  
  // ------- Low level stuff
- 	string xmlFilter( const string& in);
  	void Indent(int i);
  	void StartElem(const char *name);
--- 65,68 ----
***************
*** 202,206 ****
  
  public:
! 	ofstream ofs;
  
  	struct elem
--- 201,205 ----
  
  public:
! 	Transcoder ofs;
  
  	struct elem

Index: Parser.dsp
===================================================================
RCS file: /var/lib/gme/GMESRC/GME/Parser/Parser.dsp,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** Parser.dsp	8 Apr 2004 18:02:56 -0000	1.11
--- Parser.dsp	11 Jun 2004 14:54:22 -0000	1.12
***************
*** 166,169 ****
--- 166,173 ----
  # ADD CPP /Yc"stdafx.h"
  # End Source File
+ # Begin Source File
+ 
+ SOURCE=.\Transcoder.cpp
+ # End Source File
  # End Group
  # Begin Group "Header Files"
***************
*** 204,207 ****
--- 208,215 ----
  
  SOURCE=.\StdAfx.h
+ # End Source File
+ # Begin Source File
+ 
+ SOURCE=.\Transcoder.h
  # End Source File
  # End Group



More information about the GME-commit mailing list