[GME-commit] GMESRC/GME/Include/xercesc/util/NetAccessors/libWWW BinURLInputStream.cpp, NONE, 1.1 BinURLInputStream.hpp, NONE, 1.1 LibWWWNetAccessor.cpp, NONE, 1.1 LibWWWNetAccessor.hpp, NONE, 1.1 Makefile.in, NONE, 1.1

Log messages of CVS commits gme-commit at list.isis.vanderbilt.edu
Tue Feb 19 14:16:30 CST 2008


Update of /project/gme-repository/GMESRC/GME/Include/xercesc/util/NetAccessors/libWWW
In directory escher:/tmp/cvs-serv26529/util/NetAccessors/libWWW

Added Files:
	BinURLInputStream.cpp BinURLInputStream.hpp 
	LibWWWNetAccessor.cpp LibWWWNetAccessor.hpp Makefile.in 
Log Message:
Xerces2.7 includes checkin.


CVS User: Zoltan Molnar, ISIS (zolmol)

--- NEW FILE: Makefile.in ---
#
# Copyright 1999-2000,2004 The Apache Software Foundation.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
#
# $Log: Makefile.in,v $
# Revision 1.1  2008/02/19 20:16:28  zolmol
# Xerces2.7 includes checkin.
#
# Revision 1.4  2004/09/08 13:56:36  peiyongz
# Apache License Version 2.0
#
# Revision 1.3  2004/07/09 16:16:49  peiyongz
# Build on HP-Itanium, aCC A.05.52
#
# Revision 1.2  2002/07/26 16:49:28  tng
# [Bug 2681] Can't build with gcc/g++ not named 'gcc'/'g++'.  Patch from Jonathan Lennox.
#
# Revision 1.1.1.1  2002/02/01 22:22:23  peiyongz
# sane_include
#
# Revision 1.3  2001/07/06 17:12:01  tng
# Automatic build of single-threaded library.  By Martin Kalen.
#
# Revision 1.2  2001/06/27 17:08:55  tng
# [Bug 1147] Headers install in wrong directory.  By Murray Cumming.
#
# Revision 1.1  2001/03/02 14:39:29  tng
# Enabling libWWW NetAccessor support under UNIX. Tested with latest tarball of libWWW
# (w3c-libwww-5.3.2) under RedHat Linux 6.1.  Added by Martin Kalen.
#
# There is one MAJOR problem with the use of libwww and the patches
# below, which someone with knowledge of libwww filters etc. might want
# to look into. Default behavior for content-type text/xml is to consume
# all xml data before it reaches the simple HTML presenter. Hence, only
# files with content-type text/html will actually reach the xerces-c
# library. If you have a *.xml file on the webbserver, processing of the
# file will throw an exception stating "The main XML document cannot be
# empty" (correct in a xerces point of view since if you enable debug
# build you will see that libwww "eats" all text/xml).
#
# See "Diffs for enabling libWWW NetAccessor support under UNIX" posted in March 1, 2001
# in the xerces-c-dev mailing list for further information.
#
#
PLATFORM = @platform@
CC  = @cc@
CXX = @cxx@
CXXVER = @cxxver@
GCC = @GCC@
GXX = @GXX@
CXXFLAGS = @cxxflags@
CFLAGS = @cflags@
PREFIX = @prefix@
PREFIX_INCLUDE = @prefix_include@
LDFLAGS = @ldflags@
LIBS = @libs@
OSVER = @osver@
USELIBWWW = @uselibwww@
MESSAGELOADER = @messageloader@
TRANSCODER = @transcoder@
NETACCESSOR = @netaccessor@
THREADS = @threads@

MODULE = util
SUBMODULE = NetAccessors/libWWW

include ../../../Makefile.incl

CPP_PUBHEADERS = LibWWWNetAccessor.hpp \
                 BinURLInputStream.hpp

CPP_OBJECTS = LibWWWNetAccessor.$(TO) \
              BinURLInputStream.$(TO)

include ../../Makefile.util.submodule

--- NEW FILE: BinURLInputStream.cpp ---
/*
 * Copyright 1999-2000,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: BinURLInputStream.cpp,v 1.1 2008/02/19 20:16:28 zolmol Exp $
 */

#include <xercesc/util/XMLNetAccessor.hpp>
#include <xercesc/util/NetAccessors/libWWW/BinURLInputStream.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/XMLExceptMsgs.hpp>
#include <strings.h>
#include <WWWInit.h>

XERCES_CPP_NAMESPACE_BEGIN

//
// This define specifies the size of the buffer used to read chunks
// out of the URL input stream.
//

#define URLISBUFMAXSIZE        8192


//
// We assume here that the URL is essentially composed of just ASCII characters
// and hence converting it to a 'char *' requires just to drop the leading zero
// byte. The reason, we can get away with this is that libWWW currently provides
// no wide character API's.
//
// The input Unicode string is assumed to be 0 terminated.
// The caller is responsible to free the memory allocated to store the resultant
// 'char *' string.
//

static char* localTranscode(const XMLCh* latinStrInUnicode
                            , MemoryManager* const  manager)
{
    unsigned int   lent = XMLString::stringLen(latinStrInUnicode);
    char*  retval = (char*) manager->allocate
    (
        (lent + 1) * sizeof(char)
    );//new char[lent + 1];
    unsigned int  i = 0;
    for (i = 0; i < lent; i++)
        retval[i] = (char) latinStrInUnicode[i]; // drop the leading byte.
    retval[lent] = 0;
    return retval;
}



BinURLInputStream::BinURLInputStream(const XMLURL& urlSource)
      : fBuffer(0)
      , fBufferSize(0)
      , fBufferIndex(0)
      , fRemoteFileSize(0)
      , fAnchor(0)
      , fBytesProcessed(0)
      , fMemoryManager(urlSource.getMemoryManager())
{
    fBuffer = (XMLByte*) fMemoryManager->allocate
    (
        URLISBUFMAXSIZE * sizeof(XMLByte)
    );//new XMLByte[URLISBUFMAXSIZE];
    const XMLCh*  uri = urlSource.getURLText();
    char*   uriAsCharStar = localTranscode(uri, fMemoryManager);

    //
    // First find the size of the remote resource being asked for.
    // We use the ContentCounter stream provided by libWWW.
    //

    fAnchor = HTAnchor_findAddress(uriAsCharStar);
    HTRequest*   request = HTRequest_new();
    HTRequest_setOutputFormat(request, WWW_SOURCE);
    HTStream*    counterStrm = HTContentCounter(HTBlackHole(), request, 0xFFFF);
    BOOL  status = HTLoadToStream(uriAsCharStar, counterStrm, request);
    if (status == YES)
    {
        HTParentAnchor * anchor = HTRequest_anchor(request);
        fRemoteFileSize=HTAnchor_length(anchor);
        if(fRemoteFileSize < 0)
        {
            // Patch by Artur Klauser
            // When a redirection is processed in libWWW, it seems that
            // HTAnchor_length(anchor) == -1 on the original anchor, whereas
            // HTResponse_length(response) gives the correct content length of
            // the redirection target. This has confused fRemoteFileSize and it was
            // not checked for a -1 response at all.
            HTResponse * response = HTRequest_response (request);
            fRemoteFileSize = HTResponse_length(response);
            if (fRemoteFileSize < 0) {
                ThrowXMLwithMemMgr(NetAccessorException, XMLExcepts::NetAcc_LengthError, fMemoryManager);
            }
        }
    }

    // Cleanup, before you throw any errors.
    fMemoryManager->deallocate(uriAsCharStar);
    HTRequest_delete(request);
    // Don't know whether I am supposed to delete counterStrm.

    if (status == NO)
    {
        ThrowXMLwithMemMgr(NetAccessorException, XMLExcepts::NetAcc_LengthError, fMemoryManager);
    }
}



BinURLInputStream::~BinURLInputStream()
{
    fMemoryManager->deallocate(fBuffer);//delete [] fBuffer;
    fBuffer = 0;
    // Do not delete the fAnchor. Its deleted when the destructor of
    // libWWWNetAccessor is called.
}


void BinURLInputStream::reset()
{
    fBufferSize = 0;
    fBytesProcessed = 0;
    fBufferIndex = 0;
    memset((void*) fBuffer, 0x00, sizeof(XMLByte) * URLISBUFMAXSIZE);
}


unsigned int BinURLInputStream::curPos() const
{
    return fBytesProcessed;
}


unsigned int BinURLInputStream::bytesAvail() const
{
    unsigned int  retval = fBufferSize - fBufferIndex;
    return retval;
}


unsigned int BinURLInputStream::readBytes(XMLByte* const  toFill
                                  , const unsigned int    maxToRead)
{
    unsigned int  retval = 0;
    unsigned int  bytesAsked = maxToRead;
    unsigned int  bytesForCopy = 0;

    // Wipe out the old stuff from the destination buffer to fill.

    memset((void*)toFill, 0x00, sizeof(XMLByte) * maxToRead);

    // You can only read till the end of the remote resource file.
    // So, adjust the count of bytes you want to read now.

    if (fBytesProcessed + bytesAsked >= fRemoteFileSize)
    {
        bytesAsked = fRemoteFileSize - fBytesProcessed;
    }

    if (fBufferSize > 0)
        bytesForCopy = fBufferSize - fBufferIndex;

    if (bytesAsked <= bytesForCopy)
    {
        // ...then you can satisfy this request completely from fBuffer.
        // Simply copy over the bytes to the destination array.
        memcpy((void*) toFill, (void*) (fBuffer + fBufferIndex), bytesAsked);
        fBufferIndex += bytesAsked;
        if (fBufferIndex >= fBufferSize)
        {
            fBufferSize = 0;
            fBufferIndex = 0;
        }
        fBytesProcessed += bytesAsked;
        retval = bytesAsked;
    }
    else
    {
        // ...will need to read some more bytes out of the stream.
        unsigned int    bufToFillIndex = 0;
        HTRequest*      request = HTRequest_new();
        HTChunk*        result = NULL;
        char            ranges[64];

        // First copy over what is left in fBuffer, before reading another
        // chunk out of the stream.

        if (bytesForCopy != 0)
        {
            memcpy((void*) toFill, (void*) (fBuffer + fBufferSize), bytesForCopy);
            fBufferSize = 0;
            fBufferIndex = 0;
            fBytesProcessed += bytesForCopy;
            bufToFillIndex = bytesForCopy;
            retval = bytesForCopy;
        }

        unsigned int    bytesRemainingForCopy = bytesAsked - bytesForCopy;

        // Now read a new chunk from the stream. HTTP lets you specify the
        // range of bytes that you would like.

        sprintf(ranges, "%ld-%ld", fBytesProcessed,
                fRemoteFileSize<(fBytesProcessed + URLISBUFMAXSIZE)? fRemoteFileSize - 1:  fBytesProcessed + URLISBUFMAXSIZE - 1);
        HTRequest_addRange(request, "bytes", ranges);
        HTRequest_setOutputFormat(request, WWW_SOURCE);
        result = HTLoadAnchorToChunk(fAnchor, request);
        fBufferSize = HTChunk_size(result);
        if (fBufferSize > 0)
        {
            // Store the read chunk in fBuffer.
            memset((void*) fBuffer, 0x00, URLISBUFMAXSIZE);
            memcpy((void*) fBuffer, (void*) HTChunk_data(result), fBufferSize);
            fBufferIndex = 0;
        }

        HTRequest_delete(request);
        HTChunk_delete(result);

        // Now fill the destination buffer with the new data just read.

        bytesForCopy = fBufferSize;
        if (bytesRemainingForCopy > fBufferSize)
        {
            bytesRemainingForCopy = fBufferSize;
        }
        memcpy((void*) (toFill + bufToFillIndex),
               (void*) fBuffer,
               bytesRemainingForCopy);

        // Update counters.
        retval += bytesRemainingForCopy;
        fBufferIndex += bytesRemainingForCopy;
        fBytesProcessed += bytesRemainingForCopy;
    }

    return retval;
}

XERCES_CPP_NAMESPACE_END

--- NEW FILE: LibWWWNetAccessor.hpp ---
/*
 * Copyright 1999-2000,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: LibWWWNetAccessor.hpp,v 1.1 2008/02/19 20:16:28 zolmol Exp $
 */


#if !defined(LIBWWWNETACCESSOR_HPP)
#define LIBWWWNETACCESSOR_HPP


#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/XMLURL.hpp>
#include <xercesc/util/BinInputStream.hpp>
#include <xercesc/util/XMLNetAccessor.hpp>

XERCES_CPP_NAMESPACE_BEGIN

//
// This class is the wrapper for the libWWW library which provides
// support for HTTP and other network protocols, so that URL's using
// these protocols can be used in system id's in the XML decl clauses.
//

class XMLUTIL_EXPORT LibWWWNetAccessor : public XMLNetAccessor
{
public :
    LibWWWNetAccessor();
    ~LibWWWNetAccessor();

    BinInputStream* makeNew(const XMLURL&  urlSource, const XMLNetHTTPInfo* httpInfo=0);
    const XMLCh* getId() const;

private :
    static const XMLCh fgMyName[];

    LibWWWNetAccessor(const LibWWWNetAccessor&);
    LibWWWNetAccessor& operator=(const LibWWWNetAccessor&);

}; // LibWWWNetAccessor

inline const XMLCh* LibWWWNetAccessor::getId() const
{
    return fgMyName;
}

XERCES_CPP_NAMESPACE_END

#endif // LIBWWWNETACCESSOR_HPP

--- NEW FILE: LibWWWNetAccessor.cpp ---
/*
 * Copyright 1999-2000,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * $Id: LibWWWNetAccessor.cpp,v 1.1 2008/02/19 20:16:28 zolmol Exp $
 */

#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/NetAccessors/libWWW/BinURLInputStream.hpp>
#include <xercesc/util/NetAccessors/libWWW/LibWWWNetAccessor.hpp>

#include <WWWInit.h>

XERCES_CPP_NAMESPACE_BEGIN

const XMLCh LibWWWNetAccessor::fgMyName[] =
{
    chLatin_l, chLatin_i, chLatin_b, chLatin_W, chLatin_W, chLatin_W,
    chLatin_N, chLatin_e, chLatin_t, chLatin_A, chLatin_c, chLatin_c,
    chLatin_e, chLatin_s, chLatin_s, chLatin_o, chLatin_r, chNull
};


LibWWWNetAccessor::LibWWWNetAccessor()
{
    //
    // Initialize the libWWW library here.
    //
    HTProfile_newPreemptiveClient("XercesC", gXercesFullVersionStr);
    HTConversion_add(HTFormat_conversion(), "text/xml",         "*/*", HTThroughLine, 1.0, 0.0, 0.0);
    HTConversion_add(HTFormat_conversion(), "application/xml",  "*/*", HTThroughLine, 1.0, 0.0, 0.0);
#ifdef XML_DEBUG
    HTSetTraceMessageMask("sop");
#endif
    HTAlert_setInteractive(NO);
    HTHost_setEventTimeout(5000);
}


LibWWWNetAccessor::~LibWWWNetAccessor()
{
    // Cleanup the libWWW library here.

    /* Quote from http://www.w3.org/Library/src/HTProfil.html#Client:
     *
     * This call also supersedes the termination function for the
     * Library core, HTLibTerminate() so that you don't have to call
     * that after calling this function.
    */
    HTProfile_delete();
}


BinInputStream* LibWWWNetAccessor::makeNew(const XMLURL&  urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
{
    XMLURL::Protocols  protocol = urlSource.getProtocol();
    switch(protocol)
    {
        case XMLURL::HTTP:
        {
            if(httpInfo!=0 && httpInfo->fHTTPMethod!=XMLNetHTTPInfo::GET)
                ThrowXML(NetAccessorException, XMLExcepts::NetAcc_UnsupportedMethod);
            BinURLInputStream* retStrm =
                new (urlSource.getMemoryManager()) BinURLInputStream(urlSource);
            return retStrm;
        }

        //
        // These are the only protocols we support now. So throw and
        // unsupported protocol exception for the others.
        //
        default :
            ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, urlSource.getMemoryManager());
    }
}

XERCES_CPP_NAMESPACE_END

--- NEW FILE: BinURLInputStream.hpp ---
/*
 * Copyright 1999-2000,2004 The Apache Software Foundation.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Id: BinURLInputStream.hpp,v 1.1 2008/02/19 20:16:28 zolmol Exp $
 */

#if !defined(BINURLINPUTSTREAM_HPP)
#define BINURLINPUTSTREAM_HPP


#include <xercesc/util/XMLURL.hpp>
#include <xercesc/util/XMLExceptMsgs.hpp>
#include <xercesc/util/BinInputStream.hpp>

//
// Forward reference the libWWW constructs here, so as to avoid including
// any of the libWWW headers in this file. Just being careful in isolating
// the files that explicitly need to include the libWWW headers.
//

struct _HTAnchor;

XERCES_CPP_NAMESPACE_BEGIN

//
// This class implements the BinInputStream interface specified by the XML
// parser.
//

class XMLUTIL_EXPORT BinURLInputStream : public BinInputStream
{
public :
    BinURLInputStream(const XMLURL&  urlSource);
    ~BinURLInputStream();

    unsigned int curPos() const;
    unsigned int readBytes
    (
                XMLByte* const  toFill
        , const unsigned int    maxToRead
    );
    void reset();
    unsigned int bytesAvail() const;


private :
    // -----------------------------------------------------------------------
    //  Unimplemented constructors and operators
    // -----------------------------------------------------------------------
    BinURLInputStream(const BinURLInputStream&);
    BinURLInputStream& operator=(const BinURLInputStream&); 

    // -----------------------------------------------------------------------
    //  Private data members
    //
    //  fAnchor
    //      This is the handle that LibWWW returns for the remote file that
    //      is being addressed.
    //  fBuffer
    //      This is the array in which the data is stored after reading it
    //      of the network. The maximum size of this array is decided in the
    //      constructor via a file specific #define.
    //  fBufferIndex
    //      Its the index into fBuffer and points to the next unprocessed
    //      character. When the parser asks for more data to be read of the
    //      stream, then fBuffer[fBufferIndex] is the first byte returned,
    //      unless fBufferIndex equals fBufferSize indicating that all
    //      data in the fBuffer has been processed.
    //  fBufferSize
    //      This represents the extent of valid data in the fBuffer array.
    //  fRemoteFileSize
    //      This stores the size in bytes of the remote file addressed by
    //      this URL input stream.
    //  fBytesProcessed
    //      Its a rolling count of the number of bytes processed off this
    //      input stream. Its only reset back to zero, if the stream is
    //      reset. The maximum value this can have is fRemoteFileSize.
    // -----------------------------------------------------------------------

    struct _HTAnchor*   fAnchor;
    XMLByte*            fBuffer;
    unsigned int        fBufferIndex;
    unsigned int        fBufferSize;
    int                 fRemoteFileSize;
    unsigned int        fBytesProcessed;
    MemoryManager*      fMemoryManager;
};

XERCES_CPP_NAMESPACE_END

#endif // BINURLINPUTSTREAM_HPP



More information about the GME-commit mailing list