帮找一下BUG
char* pbuf = *ppbuf;
long bufsize = *pbufsize;
long nbyte = 0;
char mimeGif [] = "image/gif";
char mimePng [] = "iamge/png";
char mimeJpeg [] = "iamge/jpeg";
char mimeHtml [] = "text/html";
char mimeObj [128] = {0};
const char* mimeAccepts [] = {mimeHtml, mimeJpeg, mimeGif, mimePng, 0};
DWORD len;
DWORD statusCode = 0;
DWORD contentLen = 0;
char contentLocation [MAX_URL] = {0};
char remark [1024-18-MAX_URL] = {0};
//-------------------------------------------------------------------------- connect
HINTERNET hConnect;
char hostname [MAX_URL] = {0};
url_get_hostname(url, hostname);
DWORD port = url_get_port(url);
hConnect = g_connectPool->get_conn(hostname, port);
if (!hConnect) {
DWORD err = GetLastError();
sprintf(remark, "connect fail: %d", err);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return false;
}
//-------------------------------------------------------------------------- request
char urlpath [MAX_URL] = {0};
char extrainfo [MAX_URL] = {0};
url_get_urlpath(url, urlpath);
url_get_extrainfo(url, extrainfo);
string obj = urlpath;
obj += extrainfo;
HINTERNET hRequest = HttpOpenRequest(
hConnect, "GET", obj.c_str(), "HTTP/1.1", NULL, mimeAccepts,
INTERNET_FLAG_NO_UI | INTERNET_FLAG_DONT_CACHE | INTERNET_FLAG_NO_AUTO_REDIRECT, 0);
if (!hRequest) {
DWORD err = GetLastError();
sprintf(remark, "request fail: %d", err);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return false;
}
InternetSetOption(hRequest, INTERNET_OPTION_RECEIVE_TIMEOUT, &g_recvTimeout, sizeof(g_recvTimeout));
InternetSetOption(hRequest, INTERNET_OPTION_SEND_TIMEOUT, &g_sendTimeout, sizeof(g_sendTimeout));
InternetSetOption(hRequest, INTERNET_OPTION_DATA_RECEIVE_TIMEOUT, &g_recvTimeout, sizeof(g_recvTimeout));
InternetSetOption(hRequest, INTERNET_OPTION_DATA_SEND_TIMEOUT, &g_sendTimeout, sizeof(g_sendTimeout));
InternetSetOption(hRequest, INTERNET_OPTION_CONNECT_RETRIES, &g_connectRetries, sizeof(g_connectRetries));
if (!HttpSendRequest(hRequest, "UA-CPU:x86\r\n", -1, NULL, 0)) {
DWORD err = GetLastError();
sprintf(remark, "request fail: %d", err);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return false;
}
len = sizeof(statusCode);
if (!HttpQueryInfo(hRequest, HTTP_QUERY_FLAG_NUMBER | HTTP_QUERY_STATUS_CODE, &statusCode, &len, NULL)) {
DWORD err = GetLastError();
sprintf(remark, "request fail: %d", err);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return false;
}
//-------------------------------------------------------------------------- StatusCode
int statusCodeType = get_downloadStatusType(statusCode);
if (statusCodeType == STATUSCODE_REDIRECT)
{
sprintf(remark, "statuscode=%d", statusCode);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
ZeroMemory(remark, 1024-MAX_URL-18);
lstrcpy(remark, "http redirect");
len = MAX_URL;
HttpQueryInfo(hRequest, HTTP_QUERY_LOCATION, contentLocation, &len, NULL);
if (is_validurl(contentLocation)) {
if (bPage) { g_qAvailableA[g_iExplorationDepth->get()]->add(contentLocation, remark); }
else { g_qAvailableP[g_iExplorationDepth->get()]->add(contentLocation, remark); }
}
return false;
}
else if (statusCodeType == STATUSCODE_BROKEN)
{
sprintf(remark, "statuscode=%d", statusCode);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return FALSE;
}
else if (statusCodeType == STATUSCODE_INVALID)
{
sprintf(remark, "statuscode=%d", statusCode);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
//-------------------------------------------------------------------------- MimeType
len = sizeof(mimeObj);
if (!HttpQueryInfo(hRequest, HTTP_QUERY_CONTENT_TYPE, mimeObj, &len, NULL)) {
DWORD err = GetLastError();
sprintf(remark, "request fail: %d", err);
if (bPage) { g_qFailedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_A, url, remark); }
else { g_qFailedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_FAILED_P, url, remark); }
return FALSE;
}
if ((!bPage) && strstr(mimeObj, mimeHtml)) {
sprintf(remark, "invalid mime type: %s", mimeObj);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
if ((!g_fJpg) && strstr(mimeObj, mimeJpeg)) {
sprintf(remark, "invalid mime type: %s", mimeObj);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
if ((!g_fGif) && strstr(mimeObj, mimeGif)) {
sprintf(remark, "invalid mime type: %s", mimeObj);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
if ((!g_fPng) && strstr(mimeObj, mimePng)) {
sprintf(remark, "invalid mime type: %s", mimeObj);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
else if (!(strstr(mimeObj, mimeHtml) || strstr(mimeObj, mimeJpeg) || strstr(mimeObj, mimeGif))) {
sprintf(remark, "invalid mime type: %s", mimeObj);
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return FALSE;
}
//-------------------------------------------------------------------------- Length
len = sizeof(contentLen);
if (HttpQueryInfo(hRequest, HTTP_QUERY_FLAG_NUMBER | HTTP_QUERY_CONTENT_LENGTH, &contentLen, &len, NULL)) {
if (!bPage) {
contentLen = contentLen / 1024;
if (contentLen > g_maxP || contentLen < g_minP) {
sprintf(remark, "invalid file size: %dkb", contentLen);
g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark);
PostLog(ELEM_PROCESSED_P, url, remark);
return FALSE;
}
}
else if (contentLen / 1024 > MAX_FILESIZE) {
sprintf(remark, "invalid file size: %dkb", contentLen);
g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark);
PostLog(ELEM_PROCESSED_A, url, remark);
return FALSE;
}
}
//-------------------------------------------------------------------------- Content-Location
len = MAX_URL;
HttpQueryInfo(hRequest, HTTP_QUERY_CONTENT_LOCATION, contentLocation, &len, NULL);
if (*contentLocation == 0) lstrcpy(contentLocation, url);
string filename = path_get_filename(contentLocation);
//-------------------------------------------------------------------------- Download
DWORD cRead;
char tmpbuf[TEMP_BUF];
while (InternetReadFile(hRequest, tmpbuf, sizeof(tmpbuf), &cRead) && cRead)
{
if (nbyte+cRead <= bufsize) {
memmove(pbuf+nbyte, tmpbuf, cRead);
nbyte += cRead;
}
else
{
char* p = (char*)malloc(bufsize+TEMP_BUF);
if (pbuf) {
memcpy(p, pbuf, bufsize);
free(pbuf);
pbuf = NULL;
}
memcpy(p+bufsize, tmpbuf, cRead);
nbyte += cRead;
pbuf = p;
bufsize += TEMP_BUF;
*pbufsize = bufsize;
*ppbuf = pbuf;
if (nbyte/1024 >= MAX_FILESIZE)
{
sprintf(remark, "invalid file size");
if (bPage) { g_qProcessedA[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_A, url, remark); }
else { g_qProcessedP[g_iExplorationDepth->get()]->add(url, remark); PostLog(ELEM_PROCESSED_P, url, remark); }
return false;
}
}
}
//-------------------------------------------------------------------------- SaveFiles