#include <stdio.h>
#include "winsock2.h"
#pragma comment(lib, "ws2_32.lib")
//Function parameter input and output
#define IN
#define OUT
//String macro definition
#define STR_HTTP_CONTENT_TYPE "Content-Type: "
#define STR_HTTP_CONTENT_LENGTH "Content-Length: "
#define STR_HTTP_IMG_START "<img"
#define STR_HTTP_IMG_SRC_START "src=\""
#define STR_HTTP_URL_START "http://"
#define STR_HTTP_DEFAULT_PORT "80"
#define STR_HTTP_RESPONSE_200OK "HTTP/1.1 200"
//Symbol macro definition
#define HTTP_NEW_LINE "\r\n"
#define HTTP_DOUBLE_NEW_LINE "\r\n\r\n"
#define HTTP_IMG_SRC_END "\""
//Length macro definition
#define LENGTH_IP 16
#define LENGTH_HOST_NAME 20
#define LENGTH_RESOURCE 100
#define LENGTH_URL LENGTH_RESOURCE + LENGTH_HOST_NAME
#define LENGTH_PORT 6
#define LENGTH_TYPE 20
#define LENGTH_LEN 10
#define LENGTH_ONCE_READ 204800
#define LENGTH_HEAD_WRITE 1024
#define LENGTH_FILE_NAME 20
int to_null_terminating_string( char* str )
{
if( '\n' == *(str + strlen(str) -1 ) )
{
*(str + strlen(str) -1) = 0;
if( '\r' == *(str + strlen(str) - 1) )
{
*(str + strlen(str) - 1) = 0;
}
return 0;
}
else
return -1;
}
int to_new_line_string( char* str )
{
if( '\n' == *(str + strlen(str) -1 ) && '\r' == *(str + strlen(str) - 2) )
{
return 1;
}
else if( '\n' == *(str + strlen(str) -1 ) && '\r' != *(str + strlen(str) - 2) )
{
*(str + strlen(str) -1 ) = '\r';
*(str + strlen(str) -2 ) = '\n';
}
else
{
strcat( str, "\r\n" );
}
return 0;
}
int sock_create_tcp_socket(SOCKET* sock)
{
SOCKET sSocket = INVALID_SOCKET;
int ret = 0;
WSADATA wsaData;
WSAStartup(MAKEWORD(2, 0), &wsaData);
sSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if( INVALID_SOCKET == sSocket )
{
printf( "socket create failed\n" );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
ret = -1;
}
else
{
*sock = sSocket;
}
return ret;
}
int sock_connect( const SOCKET sock_fd, const char* ip, const int port )
{
int nRet = 0;
SOCKADDR_IN stSvrAddrIn = {0};
stSvrAddrIn.sin_family = AF_INET;
stSvrAddrIn.sin_port = htons(port);
stSvrAddrIn.sin_addr.s_addr = inet_addr(ip);
nRet = connect(sock_fd, (SOCKADDR*)&stSvrAddrIn, sizeof(SOCKADDR));
if( SOCKET_ERROR == nRet )
{
printf( "socket connect to [%s:%d] failed\n", ip, port );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
nRet = -1;
}
return nRet;
}
void sock_error_str( int errno )
{
if( WSANOTINITIALISED == errno )
{
printf( "errno WSANOTINITIALISED:A successful WSAStartup call must occur before using this function.\n" );
}
else if( WSAENETDOWN == errno )
{
printf( "errno WSANOTINITIALISED:The network subsystem has failed.\n" );
}
else if( WSAHOST_NOT_FOUND == errno )
{
printf( "errno WSANOTINITIALISED:Authoritative answer host not found.\n" );
}
else if( WSATRY_AGAIN == errno )
{
printf( "errno WSANOTINITIALISED:Nonauthoritative host not found, or server failure.\n" );
}
else if( WSANO_RECOVERY == errno )
{
printf( "errno WSANOTINITIALISED:A nonrecoverable error occurred.\n" );
}
else if( WSANO_DATA == errno )
{
printf( "errno WSANO_DATA:Valid name, no data record of requested type.n" );
}
else if( WSAEINPROGRESS == errno )
{
printf( "errno WSAEINPROGRESS:A blocking Windows Sockets 1.1 call is in progress, or the service provider is still processing a callback function.\n" );
}
else if( WSAEFAULT == errno )
{
printf( "errno WSAEFAULT:The name parameter is not a valid part of the user address space.\n" );
}
else if( WSAEINTR == errno )
{
printf( "errno WSAEINTR:A blocking Windows Socket 1.1 call was canceled through WSACancelBlockingCall.\n" );
}
else
{
printf( "Other errno\n" );
}
}
int sock_get_ip_by_host( const char* host_name, char* ip )
{
HOSTENT *host_entry = 0;
int WSA_return;
WSADATA WSAData;
WSA_return=WSAStartup(0x0101,&WSAData);
host_entry=gethostbyname(host_name);
if(0==WSA_return)
{
if(0!=host_entry)
{
sprintf(ip, "%d.%d.%d.%d",
(host_entry->h_addr_list[0][0]&0x00ff),
(host_entry->h_addr_list[0][1]&0x00ff),
(host_entry->h_addr_list[0][2]&0x00ff),
(host_entry->h_addr_list[0][3]&0x00ff));
printf("Host:[%s] host length:[%d] ip:[%s]\n", host_name, strlen(host_name), ip);
}
else
{
printf( "gethostbyname failed\n" );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
sock_error_str( WSAGetLastError() );
}
}
else
{
printf( "WSAStartup failed\n" );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
sock_error_str( WSAGetLastError() );
}
return 0;
}
int sock_send_buf( const SOCKET sock_fd, const char* snd_buf, const int snd_length )
{
int nRet = 0;
int total = 0;
while( total < snd_length )
{
nRet = send( sock_fd, snd_buf + total, snd_length - total, 0 );
if( SOCKET_ERROR == nRet )
{
printf( "socket [%d] send error\n", sock_fd );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
nRet = -1;
break;
}
else
{
total += nRet;
printf( "Send [%d] bytes this time total [%d] bytes left [%d] bytes\n", nRet, total, snd_length - total );
}
}
if( total == snd_length )
nRet = total;
return nRet;
}
int sock_rcv_buf( const SOCKET sock_fd, char* buf, int length )
{
int nRet = recv( sock_fd, buf, length, 0 );
if( 0 == nRet )
{
printf( "Peer socket is closed while receiving\n" );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
nRet = -1;
}
else if( SOCKET_ERROR == nRet )
{
printf( "Socket received error\n" );
printf( "WSAGetLastError():[%d]\n", WSAGetLastError() );
nRet = -2;
}
else
{
printf( "Socket receive [%d] bytes expected for [%d] bytes\n", nRet , length );
}
return nRet;
}
int http_parse_url_simple( IN const char* url, OUT char* host_name, OUT char* file_name )
{
char ip[LENGTH_IP] = {0};
int port = 0;
char resource[LENGTH_RESOURCE] = {0};
char* file_name_start = 0;
file_name_start = url;
while(1)
{
file_name_start = strstr( file_name_start, "/" );
if( 0 == strstr( file_name_start + 1, "/" ) )
break;
file_name_start += 1;
}
strcpy( file_name, file_name_start + 1 );
http_parse_url(url,host_name,ip,&port,resource);
return 0;
}
int http_parse_url( IN const char* url, OUT char* host_name, OUT char* ip, OUT int* port, OUT char* resource)
{
char portStr[LENGTH_PORT] = {0};
char* hostStart = 0;
char* hostEnd = 0;
char* portStart = 0;
char* portEnd = 0;
char* resourceStart = 0;
char* resourceEnd = 0;
/*1 step: get host name*/
if( 0 == memcmp( url, STR_HTTP_URL_START, strlen(STR_HTTP_URL_START) ) )
{
hostStart = url + strlen(STR_HTTP_URL_START);
}
else
{
hostStart = url;
}
hostEnd = strstr( hostStart, "/" );
if( 0 != hostEnd )
{
memcpy( host_name, hostStart, hostEnd - hostStart );
}
else
{
strcpy( host_name, hostStart );
}
/*2 step: get resource*/
strcpy( resource, hostEnd );
/*3 step: get port*/
portStart = strstr( host_name, ":" );
if( 0 != portStart )
{
portStart += strlen(":");
strcpy( portStr, portStart );
host_name[portStart-host_name-1] = '\0';
}
else
{
strcpy( portStr, STR_HTTP_DEFAULT_PORT );
}
*port = atoi(portStr);
/*4 step: get ip*/
sock_get_ip_by_host(host_name,ip);
return 0;
}
int http_parse_response( IN const char* response, OUT char* type, OUT int* length )
{
char* typeStart = 0;
char* typeEnd = 0;
char* lenStart = 0;
char* lenEnd = 0;
char lenStr[LENGTH_LEN] = {0};
if( 0 != strncmp( response, STR_HTTP_RESPONSE_200OK, strlen(STR_HTTP_RESPONSE_200OK)) )
{
printf("http response is not 200 OK\n");
printf( "%c %c %c %c %c %c %c %c %c %c\n", response[0], response[1], response[2], response[3], response[4] ,response[5], response[6], response[7], response[8], response[9] );
return -2;
}
typeStart = strstr(response,STR_HTTP_CONTENT_TYPE);
lenStart = strstr(response,STR_HTTP_CONTENT_LENGTH);
if( !typeStart || !lenStart )
{
printf( "cantnot find %s or %s\n", STR_HTTP_CONTENT_TYPE, STR_HTTP_CONTENT_LENGTH );
return -1;
}
typeEnd = strstr(typeStart,HTTP_NEW_LINE);
lenEnd = strstr(lenStart,HTTP_NEW_LINE);
if( !typeEnd || !lenEnd )
{
printf( "cannot find the end of %s or %s\n", STR_HTTP_CONTENT_TYPE, STR_HTTP_CONTENT_LENGTH );
return -1;
}
strncpy(lenStr,lenStart+strlen(STR_HTTP_CONTENT_LENGTH),lenEnd-lenStart-strlen(STR_HTTP_CONTENT_LENGTH));
*length = atoi(lenStr);
typeStart = strchr( typeStart,'/');
strncpy(type,typeStart+1,typeEnd-typeStart-1);
type[typeEnd-typeStart-1] = '\0';
printf( "Get type %s length [%d] from http response\n", type, *length );
return 0;
}
int produce_task_from_buffer( const char* buf, const int len, const char* host_name, const FILE* stream )
{
char* bufTmp = 0;
char* imgStart = 0;
char* srcStart = 0;
char* srcEnd = 0;
char imgUrl[LENGTH_URL] = {0};
bufTmp = buf;
while(1)
{
memset( imgUrl, 0, sizeof(imgUrl) );
imgStart = strstr( bufTmp, STR_HTTP_IMG_START );
if( 0 == imgStart )
return -1;
srcStart = strstr( imgStart, STR_HTTP_IMG_SRC_START );
if( 0 == srcStart )
return -1;
srcEnd = strstr( srcStart + strlen(STR_HTTP_IMG_SRC_START), HTTP_IMG_SRC_END );
if( 0 == srcEnd )
return -1;
if( '/' == *(srcStart + strlen(STR_HTTP_IMG_SRC_START)) )
{
strncpy( imgUrl, host_name, strlen(host_name) );
strncpy( imgUrl + strlen(host_name) , srcStart + strlen(STR_HTTP_IMG_SRC_START) , srcEnd - srcStart - strlen(STR_HTTP_IMG_SRC_START) );
}
else
{
strncpy( imgUrl, srcStart + strlen(STR_HTTP_IMG_SRC_START) , srcEnd - srcStart - strlen(STR_HTTP_IMG_SRC_START) );
}
printf( "imgUrl is %s\n", imgUrl );
to_new_line_string(imgUrl);
fwrite( imgUrl, sizeof(char), strlen(imgUrl), stream );
fflush(stream);
bufTmp = srcEnd;
}
return 0;
}
int produce_task_from_file( const char* htmlFile,const char* host_name, const FILE* taskStream )
{
FILE* stream = fopen( htmlFile, "rb" );
char rdBuf[LENGTH_ONCE_READ] = {0};
int rdLen = 0;
if( 0 == stream || 0 == taskStream )
return -1;
memset( rdBuf, 0, sizeof(rdBuf) );
rdLen = fread( rdBuf, sizeof(char), sizeof(rdBuf), stream );
produce_task_from_buffer( rdBuf, rdLen, host_name, taskStream );
return 0;
}
int run_task(const char* taskFile)
{
FILE* stream;
char lineStr[LENGTH_URL] = {0};
int len = LENGTH_URL;
stream = fopen( taskFile, "rb" );
while( fgets( lineStr, len, stream ) )
{
to_null_terminating_string(lineStr);
printf( "lineStr = %s\n", lineStr );
download_file(lineStr);
memset( lineStr, 0, sizeof(lineStr) );
}
return 0;
}
int download_file(const char* url)
{
char host_name[LENGTH_HOST_NAME] = {0};
char ip[LENGTH_IP] = {0};
int port = 0;
char resource[LENGTH_RESOURCE] = {0};
http_parse_url( url, host_name, ip , &port, resource );
download( host_name, ip, port, resource );
return 0;
}
int download( char* host_name, char* ip, int port, char* resource )
{
SOCKET sSocket = INVALID_SOCKET;
char sndBuf[LENGTH_HEAD_WRITE] = {0};
char rcvBuf[LENGTH_ONCE_READ] = {0};
char *pRcv = rcvBuf;
int num = 0;
int nRet = -1;
int len = 0;
char type[LENGTH_TYPE] = {0};
char fileName[LENGTH_FILE_NAME] = {0};
char *fileNameStart = 0;
char* writeFileBuf = 0;
int writeFileLen = 0;
int parseHeadFlag = 0;
int totalLen = 0;
FILE *getFile = 0;
int curLen = 0;
fileNameStart = resource;
while(1)
{
fileNameStart = strstr( fileNameStart, "/" );
if( 0 == strstr( fileNameStart + 1, "/" ) )
break;
fileNameStart += 1;
}
strcpy( fileName, fileNameStart + 1 );
sprintf( sndBuf, "GET %s HTTP/1.1\nHost: %s\n\r\n", resource, host_name );
if( sock_create_tcp_socket(&sSocket) )
{
return -1;
}
if ( sock_connect( sSocket, ip, port ) )
{
return -1;
}
nRet = sock_send_buf( sSocket, sndBuf, strlen(sndBuf) );
if( -1 == nRet )
{
return -1;
}
while(1)
{
memset( pRcv, 0, LENGTH_ONCE_READ) ;
num = sock_rcv_buf( sSocket, pRcv, LENGTH_ONCE_READ);
if( num <= 0 )
{
break ;
}
writeFileBuf = pRcv;
writeFileLen = num;
if( 0 == parseHeadFlag )
{
if( 0 == http_parse_response(pRcv,type,&len) )
{
printf( "type is %s\n", type );
writeFileBuf = strstr(pRcv,HTTP_DOUBLE_NEW_LINE) + strlen(HTTP_DOUBLE_NEW_LINE);
writeFileLen = num - ( strstr(pRcv,HTTP_DOUBLE_NEW_LINE) - pRcv + strlen(HTTP_DOUBLE_NEW_LINE) );
parseHeadFlag = 1;
if( 0 == getFile )
getFile = fopen( fileName, "wb" );
}
else
{
break;
}
}
totalLen += writeFileLen;
fwrite( writeFileBuf, sizeof(char), writeFileLen, getFile );
if( totalLen >= len )
break;
}
if( getFile )
fclose(getFile);
return 0;
}
/*
Three commands
1、exe -s url
2、exe -m url1 url2 url3
3、exe -r 211 344 http://www.huawei.com/index*.html
*/
int main(int argc,char* args[])
{
FILE* taskStream;
int urlNum = 0;
int rangeStart = 0;
int rangeEnd = 0;
char url[LENGTH_URL] = {0};
char urlTmp[LENGTH_URL] = {0};
char* starStart = 0;
char host_name[LENGTH_HOST_NAME] = {0};
char file_name[LENGTH_FILE_NAME] = {0};
int i = 0;
if( argc < 3 )
{
printf( "arguments number is invalid\n" );
return -1;
}
//1、create task file
taskStream = fopen( "task.txt", "wb" );
//2、produce tasks from cmd
if( 0 == strcmp( args[1], "-s" ) )
{
printf( "download one single web picture\n" );
http_parse_url_simple( args[2], host_name, file_name );
download_file(args[2]);
produce_task_from_file( file_name, host_name, taskStream );
}
else if( 0 == strcmp( args[1], "-m" ) )
{
printf( "download multiple webs picture\n" );
for( i = 2; i < argc; i++ )
{
memset( host_name , 0, sizeof(host_name) );
memset( file_name, 0, sizeof(host_name) );
http_parse_url_simple( args[i], host_name, file_name );
download_file(args[i]);
produce_task_from_file( file_name, host_name, taskStream );
}
}
else if( 0 == strcmp( args[1], "-r" ) )
{
printf( "download webs by regulare expression\n" );
rangeStart = atoi( args[2] );
rangeEnd = atoi( args[3] );
strcpy( url, args[4] );
starStart = strstr( url, "*" );
for( i = rangeStart ; i < rangeEnd; i++ )
{
memset( urlTmp, 0, sizeof(urlTmp) );
strncpy( urlTmp, url, starStart - url );
sprintf( urlTmp + (starStart - url), "%d", i );
strcat( urlTmp, starStart + 1 );
printf( "urlTmp is %s\n", urlTmp );
memset( host_name , 0, sizeof(host_name) );
memset( file_name, 0, sizeof(host_name) );
http_parse_url_simple( urlTmp, host_name, file_name );
download_file(urlTmp);
produce_task_from_file( file_name, host_name, taskStream );
}
}
//3、execute tasks
run_task("task.txt");
return 0;
}
/*the oldest test case*/
int main1(int argc, char args[])
{
SOCKET sSocket = INVALID_SOCKET;
SOCKADDR_IN stSvrAddrIn = {0}; /* 服务器端地址 */
char sndBuf[1024] = {0};
char rcvBuf[2048] = {0};
char *pRcv = rcvBuf;
int num = 0;
int nRet = SOCKET_ERROR;
int len = 0;
char type[20] = {0};
char* writeFileBuf = 0;
int writeFileLen = 0;
WSADATA wsaData;
int parseHeadFlag = 0;
int totalLen = 0;
FILE *getFile = fopen("1.html", "wb");
/* HTTP 消息构造开始,这是程序的关键之处 */
//sprintf(sndBuf, "GET /ucmf/groups/public/documents/webasset/logo_cn.jpg HTTP/1.1\n");
//strcat(sndBuf, "Host: www.huawei.com\n\r\n");
sprintf(sndBuf, "GET /cn/about-huawei/corporate-info/vision-mission/index.htm HTTP/1.1\n");
strcat(sndBuf, "Host: www.huawei.com\n\r\n");
/* HTTP 消息构造结束 */
/* socket DLL初始化 */
WSAStartup(MAKEWORD(2, 0), &wsaData);
stSvrAddrIn.sin_family = AF_INET;
stSvrAddrIn.sin_port = htons(80);
stSvrAddrIn.sin_addr.s_addr = inet_addr("10.82.49.59");
sSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
/* 连接 */
nRet = connect(sSocket, (SOCKADDR*)&stSvrAddrIn, sizeof(SOCKADDR));
if (SOCKET_ERROR == nRet)
{
printf("connect fail!\n");
return -1;
}
/* 发送HTTP请求消息 */
send(sSocket, (char*)sndBuf, strlen(sndBuf), 0);
while(1)
{
memset( pRcv, 0, 2048) ;
num = recv(sSocket, pRcv, 2048, 0);
if((0 == num) || (-1 == num))
{
break ;
}
writeFileBuf = pRcv;
writeFileLen = num;
if( 0 == parseHeadFlag && 0 == http_parse_response(pRcv,type,&len) )
{
printf( "type is %s\n", type);
printf( "len is %d\n", len );
writeFileBuf = strstr(pRcv,HTTP_DOUBLE_NEW_LINE) + strlen(HTTP_DOUBLE_NEW_LINE);
writeFileLen = num - ( strstr(pRcv,HTTP_DOUBLE_NEW_LINE) - pRcv + strlen(HTTP_DOUBLE_NEW_LINE) );
parseHeadFlag = 1;
}
printf( "num is %d pRcv is \n\n%s", num, pRcv );
totalLen += writeFileLen;
fwrite( writeFileBuf, sizeof(char), writeFileLen, getFile );
if( totalLen >= len )
break;
}
fflush(getFile);
fclose(getFile);
return 0;
}
|