source: icXML/icXML-devel/src/xercesc/util/NetAccessors/Curl/CurlURLInputStream.cpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 12.0 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: CurlURLInputStream.cpp 936316 2010-04-21 14:19:58Z borisk $
20 */
21
22#if HAVE_CONFIG_H
23  #include <config.h>
24#endif
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#if HAVE_ERRNO_H
30  #include <errno.h>
31#endif
32#if HAVE_UNISTD_H
33  #include <unistd.h>
34#endif
35#if HAVE_SYS_TYPES_H
36  #include <sys/types.h>
37#endif
38#if HAVE_SYS_TIME_H
39  #include <sys/time.h>
40#endif
41
42#include <xercesc/util/XercesDefs.hpp>
43#include <xercesc/util/XMLNetAccessor.hpp>
44#include <xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp>
45#include <icxercesc/util/XMLString.hpp>
46#include <xercesc/util/XMLExceptMsgs.hpp>
47#include <xercesc/util/Janitor.hpp>
48#include <xercesc/util/XMLUniDefs.hpp>
49#include <icxercesc/util/TransService.hpp>
50#include <xercesc/util/TranscodingException.hpp>
51#include <icxercesc/util/PlatformUtils.hpp>
52
53XERCES_CPP_NAMESPACE_BEGIN
54
55
56CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
57      : fMulti(0)
58      , fEasy(0)
59      , fMemoryManager(urlSource.getMemoryManager())
60      , fURLSource(urlSource)
61      , fTotalBytesRead(0)
62      , fWritePtr(0)
63      , fBytesRead(0)
64      , fBytesToRead(0)
65      , fDataAvailable(false)
66      , fBufferHeadPtr(fBuffer)
67      , fBufferTailPtr(fBuffer)
68      , fPayload(0)
69      , fPayloadLen(0)
70      , fContentType(0)
71{
72        // Allocate the curl multi handle
73        fMulti = curl_multi_init();
74
75        // Allocate the curl easy handle
76        fEasy = curl_easy_init();
77
78        // Set URL option
79    TranscodeToStr url(fURLSource.getURLText(), "ISO8859-1", fMemoryManager);
80        curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str());
81
82    // Set up a way to recieve the data
83        curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function
84        curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
85
86        // Do redirects
87        curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1);
88        curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6);
89
90    // Add username and password if authentication is required
91    const XMLCh *username = urlSource.getUser();
92    const XMLCh *password = urlSource.getPassword();
93    if(username && password) {
94        XMLBuffer userPassBuf(256, fMemoryManager);
95        userPassBuf.append(username);
96        userPassBuf.append(chColon);
97        userPassBuf.append(password);
98
99        TranscodeToStr userPass(userPassBuf.getRawBuffer(), "ISO8859-1", fMemoryManager);
100
101        curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
102        curl_easy_setopt(fEasy, CURLOPT_USERPWD, (char*)userPass.str());
103    }
104
105    if(httpInfo) {
106        // Set the correct HTTP method
107        switch(httpInfo->fHTTPMethod) {
108        case XMLNetHTTPInfo::GET:
109            break;
110        case XMLNetHTTPInfo::PUT:
111            curl_easy_setopt(fEasy, CURLOPT_UPLOAD, (long)1);
112            break;
113        case XMLNetHTTPInfo::POST:
114            curl_easy_setopt(fEasy, CURLOPT_POST, (long)1);
115            break;
116        }
117
118        // Add custom headers
119        if(httpInfo->fHeaders) {
120            struct curl_slist *headersList = 0;
121
122            const char *headersBuf = httpInfo->fHeaders;
123            const char *headersBufEnd = httpInfo->fHeaders + httpInfo->fHeadersLen;
124
125            const char *headerStart = headersBuf;
126            while(headersBuf < headersBufEnd) {
127                if(*headersBuf == '\r' && (headersBuf + 1) < headersBufEnd &&
128                   *(headersBuf + 1) == '\n') {
129
130                    XMLSize_t length = headersBuf - headerStart;
131                    ArrayJanitor<char> header((char*)fMemoryManager->allocate((length + 1) * sizeof(char)),
132                                              fMemoryManager);
133                    memcpy(header.get(), headerStart, length);
134                    header.get()[length] = 0;
135
136                    headersList = curl_slist_append(headersList, header.get());
137
138                    headersBuf += 2;
139                    headerStart = headersBuf;
140                    continue;
141                }
142                ++headersBuf;
143            }
144            curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, headersList);
145            curl_slist_free_all(headersList);
146        }
147
148        // Set up the payload
149        if(httpInfo->fPayload) {
150            fPayload = httpInfo->fPayload;
151            fPayloadLen = httpInfo->fPayloadLen;
152            curl_easy_setopt(fEasy, CURLOPT_READDATA, this);
153            curl_easy_setopt(fEasy, CURLOPT_READFUNCTION, staticReadCallback);
154            curl_easy_setopt(fEasy, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fPayloadLen);
155        }
156    }
157
158        // Add easy handle to the multi stack
159        curl_multi_add_handle(fMulti, fEasy);
160
161    // Start reading, to get the content type
162        while(fBufferHeadPtr == fBuffer)
163        {
164                int runningHandles = 0;
165        readMore(&runningHandles);
166                if(runningHandles == 0) break;
167        }
168
169    // Find the content type
170    char *contentType8 = 0;
171    curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8);
172    if(contentType8)
173        fContentType = TranscodeFromStr((XMLByte*)contentType8, XMLString::stringLen(contentType8), "ISO8859-1", fMemoryManager).adopt();
174}
175
176
177CurlURLInputStream::~CurlURLInputStream()
178{
179        // Remove the easy handle from the multi stack
180        curl_multi_remove_handle(fMulti, fEasy);
181
182        // Cleanup the easy handle
183        curl_easy_cleanup(fEasy);
184
185        // Cleanup the multi handle
186        curl_multi_cleanup(fMulti);
187
188    if(fContentType) fMemoryManager->deallocate(fContentType);
189}
190
191
192size_t
193CurlURLInputStream::staticWriteCallback(char *buffer,
194                                        size_t size,
195                                        size_t nitems,
196                                        void *outstream)
197{
198        return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
199}
200
201size_t
202CurlURLInputStream::staticReadCallback(char *buffer,
203                                       size_t size,
204                                       size_t nitems,
205                                       void *stream)
206{
207    return ((CurlURLInputStream*)stream)->readCallback(buffer, size, nitems);
208}
209
210size_t
211CurlURLInputStream::writeCallback(char *buffer,
212                                  size_t size,
213                                  size_t nitems)
214{
215        XMLSize_t cnt = size * nitems;
216        XMLSize_t totalConsumed = 0;
217
218        // Consume as many bytes as possible immediately into the buffer
219        XMLSize_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
220        memcpy(fWritePtr, buffer, consume);
221        fWritePtr               += consume;
222        fBytesRead              += consume;
223        fTotalBytesRead += consume;
224        fBytesToRead    -= consume;
225
226        //printf("write callback consuming %d bytes\n", consume);
227
228        // If bytes remain, rebuffer as many as possible into our holding buffer
229        buffer                  += consume;
230        totalConsumed   += consume;
231        cnt                             -= consume;
232        if (cnt > 0)
233        {
234                XMLSize_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
235                consume = (cnt > bufAvail) ? bufAvail : cnt;
236                memcpy(fBufferHeadPtr, buffer, consume);
237                fBufferHeadPtr  += consume;
238                buffer                  += consume;
239                totalConsumed   += consume;
240                //printf("write callback rebuffering %d bytes\n", consume);
241        }
242
243        // Return the total amount we've consumed. If we don't consume all the bytes
244        // then an error will be generated. Since our buffer size is equal to the
245        // maximum size that curl will write, this should never happen unless there
246        // is a logic error somewhere here.
247        return totalConsumed;
248}
249
250size_t
251CurlURLInputStream::readCallback(char *buffer,
252                                 size_t size,
253                                 size_t nitems)
254{
255    XMLSize_t len = size * nitems;
256    if(len > fPayloadLen) len = fPayloadLen;
257
258    memcpy(buffer, fPayload, len);
259
260    fPayload += len;
261    fPayloadLen -= len;
262
263    return len;
264}
265
266bool CurlURLInputStream::readMore(int *runningHandles)
267{
268    // Ask the curl to do some work
269    CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
270
271    // Process messages from curl
272    int msgsInQueue = 0;
273    for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
274    {
275        //printf("msg %d, %d from curl\n", msg->msg, msg->data.result);
276
277        if (msg->msg != CURLMSG_DONE)
278            return true;
279
280        switch (msg->data.result)
281        {
282        case CURLE_OK:
283            // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
284            break;
285
286        case CURLE_UNSUPPORTED_PROTOCOL:
287            ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
288            break;
289
290        case CURLE_COULDNT_RESOLVE_HOST:
291        case CURLE_COULDNT_RESOLVE_PROXY:
292          {
293            if (fURLSource.getHost())
294              ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
295            else
296              ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::File_CouldNotOpenFile, fURLSource.getURLText(), fMemoryManager);
297            break;
298          }
299
300        case CURLE_COULDNT_CONNECT:
301            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
302            break;
303
304        case CURLE_RECV_ERROR:
305            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
306            break;
307
308        default:
309            ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
310            break;
311        }
312    }
313
314    // If nothing is running any longer, bail out
315    if(*runningHandles == 0)
316        return false;
317
318    // If there is no further data to read, and we haven't
319    // read any yet on this invocation, call select to wait for data
320    if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
321    {
322        fd_set readSet;
323        fd_set writeSet;
324        fd_set exceptSet;
325        int fdcnt=0;
326
327        FD_ZERO(&readSet);
328        FD_ZERO(&writeSet);
329        FD_ZERO(&exceptSet);
330
331        // Ask curl for the file descriptors to wait on
332        curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
333
334        // Wait on the file descriptors
335        timeval tv;
336        tv.tv_sec  = 2;
337        tv.tv_usec = 0;
338        select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
339    }
340
341    return curlResult == CURLM_CALL_MULTI_PERFORM;
342}
343
344XMLSize_t
345CurlURLInputStream::readBytes(XMLByte* const          toFill
346                                     , const XMLSize_t maxToRead)
347{
348        fBytesRead = 0;
349        fBytesToRead = maxToRead;
350        fWritePtr = toFill;
351
352        for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
353        {
354                // First, any buffered data we have available
355                XMLSize_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
356                bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
357                if (bufCnt > 0)
358                {
359                        memcpy(fWritePtr, fBufferTailPtr, bufCnt);
360                        fWritePtr               += bufCnt;
361                        fBytesRead              += bufCnt;
362                        fTotalBytesRead += bufCnt;
363                        fBytesToRead    -= bufCnt;
364
365                        fBufferTailPtr  += bufCnt;
366                        if (fBufferTailPtr == fBufferHeadPtr)
367                                fBufferHeadPtr = fBufferTailPtr = fBuffer;
368
369                        //printf("consuming %d buffered bytes\n", bufCnt);
370
371                        tryAgain = true;
372                        continue;
373                }
374
375                // Ask the curl to do some work
376                int runningHandles = 0;
377        tryAgain = readMore(&runningHandles);
378
379                // If nothing is running any longer, bail out
380                if (runningHandles == 0)
381                        break;
382        }
383
384        return fBytesRead;
385}
386
387const XMLCh *CurlURLInputStream::getContentType() const
388{
389    return fContentType;
390}
391
392XERCES_CPP_NAMESPACE_END
Note: See TracBrowser for help on using the repository browser.