1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
| #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <fcntl.h>
#include <time.h>
#define APP_STATIC(type) static type
#define APP_EXTERN(type) extern type
#define APP_DECLARE(type) type __stdcall
#define APP_DECLARE_NONSTD(type) type __cdecl
#define APP_DECLARE_DATA
#define NEW(type, len) (type *)calloc(len, sizeof(type))
#define RENEW(ptr, type, pos, len) do { \
ptr = (type *)realloc(ptr, sizeof(type) * (pos + len)); \
memset(&ptr[pos], 0, sizeof(type) * (len)); \
} while (0)
APP_DECLARE(int) memcs(const char *data, int lenData, int *posData, const char *key, int lenKey, int *posKey);
APP_DECLARE(char *) substr(const char *str, int pos, int len);
APP_DECLARE(char *) SUBSTR(const char *data, size_t len, const char *start, const char *end);
APP_DECLARE(char *) concat(char **ret, const char *str, int len);
typedef enum {False,True} Boolean;
#define MIN_PATH_SIZE 255
// memory cursor
// 内存游标,用来查询一段连续内存data中从游标posData开始另外一段内存key出现的位置及符合匹配的长度
// = 0,查询成功,data游标posData移到匹配位置的后一位,key的游标posKey归零
// = 1,查询到内存key在内存data的末尾有posKey长度的匹配
// = -1,查询失败,key的游标posKey归零
APP_DECLARE(int) memcs(const char *data, int lenData, int *posData, const char *key, int lenKey, int *posKey) {
int i = *posData, p = *posKey, x = p;
for (; i < lenData; i++) {
if (data[i] == key[x]) {
if (++x == lenKey) {
*posData = i + 1;//data游标移到下一位等待下次查询
*posKey = 0;
return 0;
}
} else if (x != p) {
i -= x - p;
x = p;
}
}
*posData = i;
*posKey = x != p ? x : 0;
return x != p ? 1 : -1;
}
APP_DECLARE(char *) substr(const char *str, int start, int len) {
if (NULL == str || len == 0) return NULL;
char *ret = NEW(char, len + 1);
memmove(ret, &str[start], len);
return ret;
}
APP_DECLARE(char *) SUBSTR(const char *str, size_t len, const char *start, const char *end) {
if (str == NULL || start == NULL || end == NULL) return NULL;
char *p = strstr(str, start);
if (p == NULL) return NULL;
p += strlen(start);
if (p - str > len) return NULL;
char *q = strstr(p, end);
if (q == NULL) return NULL;
if (q - str > len) return NULL;
return substr(p, 0, q - p);
}
APP_DECLARE(char *) concat(char **ret, const char *str, int len) {
if (str == NULL || len < 1) return *ret;
char *ptr = *ret;
int pos = 0;
if (ptr == NULL) {
ptr = NEW(char, len + 1);
} else {
pos = strlen(ptr);
RENEW(ptr, char, pos, len + 1);
}
memmove(&ptr[pos], str, len);
return *ret = ptr;
}
APP_STATIC(void) debug(const char *fmt, ...) {
va_list list;
FILE *fp = NULL;
va_start(list, fmt);
if (!fopen_s(&fp, "./debug.txt", "ab")) {
vfprintf(fp, fmt, list);
fclose(fp);
} else {
vfprintf(stderr, fmt, list);
}
va_end(list);
}
//CGI分段解析长度,256是测试用的,实际可以改为204800
#define CGI_PARSE_CHUNK 256
typedef struct __cgi_t cgi_t, *cgi_ptr;
typedef struct __cgi_item_t cgi_item_t, *cgi_item_ptr;
typedef struct __cgi_file_t cgi_file_t, *cgi_file_ptr;
typedef enum {
PARSE_ERROR = -1,
PARSE_BEGIN,
PARSE_HEAD,
PARSE_BODY,
PARSE_END
} cgiParseState;
typedef enum {
CGI_ITEM_ENV,
CGI_ITEM_QUERY_STRING,
CGI_ITEM_FORM,
CGI_ITEM_FILE
} cgiItemType;
struct __cgi_t {
size_t totalBytes;
char *requestMethod, *requestType, *boundary;
cgi_item_ptr envs, queries, forms;
};
struct __cgi_item_t {
cgiItemType type;
char *name;
union {
char *value;
cgi_file_ptr file;
};
cgi_item_ptr next;
};
struct __cgi_file_t {
char *filename, path[MIN_PATH_SIZE + 1];
size_t size;
int fd;
};
APP_DECLARE(cgi_ptr) cgiInit();
APP_DECLARE(cgiParseState) cgiParse(cgi_ptr cgi, FILE *cgiIn);
APP_STATIC(void) cgiPrintItem(cgi_ptr cgi, cgi_item_ptr item) {
for (cgi_item_ptr it = item; it; it = it->next) {
switch (it->type) {
case CGI_ITEM_FORM: {
printf("Form[%s].length = [%d]\nForm[%s].value = %s\n", it->name, it->value ? strlen(it->value) : 0, it->name, it->value);
break;
}
case CGI_ITEM_FILE: {
printf("File[%s].length = %d\nFile[%s].filename = %s\nFile[%s].path = %s\n", it->name, it->file->size, it->name, it->file->filename, it->name, it->file->path);
break;
}
}
printf("---------------------------------\n");
}
}
int main(int argc, char *argv[]) {
time_t start = time(0);
cgi_ptr cgi = cgiInit();
if (cgi->requestMethod == NULL) { //local test
FILE *fp;
if (!fopen_s(&fp, "D:/www/tmp.txt", "rb")) {//保存数据包的本地文件
cgi->boundary = "----WebKitFormBoundaryvWmSGiJ8xX3qdocw";//数据包中的boundary
fseek(fp, 0, SEEK_END);
cgi->totalBytes = ftell(fp);
fseek(fp, 0, SEEK_SET);
cgiParse(cgi, fp);
}
fclose(fp);
} else {
setmode(fileno(stdin), O_BINARY);//将stdin的数据流置为BINARY类型,这点很重要
setmode(fileno(stdout), O_BINARY);
if (cgi->boundary) cgiParse(cgi, stdin);
}
printf("Content-Type: text/plain; charset=utf-8\n\n");
cgiPrintItem(cgi, cgi->forms);
printf("upload %.2f MB used %d sec\n", (double)(cgi->totalBytes / (1024 * 1024)), time(0) - start);
return 0;
}
APP_DECLARE(cgi_ptr) cgiInit() {
cgi_ptr cgi = NEW(cgi_t, 1);
char *tmp = getenv("CONTENT_LENGTH");
cgi->totalBytes = tmp ? atoi(tmp) : 0;
cgi->requestMethod = getenv("REQUEST_METHOD");
cgi->requestType = getenv("CONTENT_TYPE");
if (cgi->requestType != NULL && (tmp = strstr(cgi->requestType, "boundary=")) != NULL) {
cgi->boundary = tmp + 9;
}
return cgi;
}
APP_STATIC(Boolean) cgi_mktemp(cgi_ptr cgi, cgi_file_ptr cf) {
char tpl[] = "UP-XXXXXX";
if (-1 == (cf->fd = mkstemp(tpl))) {
fprintf(stderr, "%s mkstemp() failed.\n", __FUNCTION__);
return False;
}
_snprintf(cf->path, MIN_PATH_SIZE, "%s", tpl);
return True;
}
APP_DECLARE(cgiParseState) cgiParse(cgi_ptr cgi, FILE *cgiIn) {
char buf[CGI_PARSE_CHUNK + 1] = {0}, CRLF2[] = "\r\n\r\n", *key = cgi->boundary, *filename;
int index = 0, total = cgi->totalBytes, chunk = CGI_PARSE_CHUNK, pos = 0, len = 0, posData = 0, posKey = 0, lenBoundary = strlen(cgi->boundary), lenCRLF2 = strlen(CRLF2), lenKey = lenBoundary;
int cs;
cgiParseState state = PARSE_BEGIN;
cgi_item_ptr it = NULL;
cgi->forms = NEW(cgi_item_t, 1);
it = cgi->forms;
while (index < total && state != PARSE_END && state != PARSE_ERROR) {
if (index + chunk - posData > total) {
chunk = total - index + posData;
}
len = chunk - posData;
index += len;
if (len != fread(&buf[posData], sizeof(char), len, cgiIn)) {
debug("fread failed at %d\n", index);
state = PARSE_ERROR;
break;
}
while (state != PARSE_END && state != PARSE_ERROR) {
cs = memcs(buf, chunk, &posData, key, lenKey, &posKey);
if (PARSE_BEGIN == state) {
//解析开始,寻找第一个boundary
if (0 != cs) {
debug("PARSE_BEGIN failed [%s]\n", buf);
state = PARSE_ERROR;
} else {
//继续解析,寻找form-data头的结束标记[\r\n\r\n]
state = PARSE_HEAD;
key = CRLF2;
lenKey = lenCRLF2;
pos = posData;//记载boundary的结束位置,这个pos在下次搜索中表示上次搜索的位置,同样表示曾经有过上次检索
}
} else if (PARSE_HEAD == state) {
//解析form-data头信息
if (0 == cs) {
it->name = SUBSTR(&buf[pos], posData - pos, "; name="", """);
if (it->name == NULL) {
debug("PARSE_HEAD failed [%s]", buf);
state = PARSE_ERROR;
} else {
if (NULL != (filename = SUBSTR(&buf[pos], posData - pos, "; filename="", """))) {
it->type = CGI_ITEM_FILE;
it->file = NEW(cgi_file_t, 1);
it->file->filename = filename;
//创建一个临时文件用来保存上传文件,不然用内存保存上传文件内容,不是一个好办法
cgi_mktemp(cgi, it->file);
} else {
it->type = CGI_ITEM_FORM;
}
//继续解析form-data内容,需要寻找下一个boundary
state = PARSE_BODY;
key = cgi->boundary;
lenKey = lenBoundary;
pos = posData;
}
} else {
//获取不到头信息,那么把上次查询的游标位置移动到起始,方便下一轮查询
memmove(buf, &buf[pos], chunk - pos);
posData = chunk - pos;
pos = 0;
break;
}
} else if (PARSE_BODY == state) {
//寻找form-data之后的下一个{boundary}
if (0 == cs) {
len = posData - lenKey - pos - 4;//4 = {boundary}之前的[\r\n--]
if (len > 0) {
if (it->type == CGI_ITEM_FORM) {
//input|textarea|select则追加到内容
concat(&it->value, &buf[pos], len);
} else {
//input[type=file]则写入临时文件
it->file->size += len;
write(it->file->fd, &buf[pos], sizeof(char) * len);
close(it->file->fd);
}
} else if (it->type == CGI_ITEM_FILE) {
close(it->file->fd);
}
if (buf[posData] == '-' && buf[posData + 1] == '-') {
state = PARSE_END;
//如果找到{boundary}的结束标记,那么就结束
} else {
it->next = NEW(cgi_item_t, 1);
it = it->next;
//继续寻找下一个form-data头信息
state = PARSE_HEAD;
key = CRLF2;
lenKey = lenCRLF2;
pos = posData;
}
} else if (1 == cs) {
//寻找到posKey长度的匹配,那么需要回溯4个字节的内存游标
//这4个字节是预留给{boundary}之前的[\r\n--]
len = chunk - posKey - pos - 4;
//预留4个字节之后,其余的字符写入表单值或临时文件
if (it->type == CGI_ITEM_FORM) {
concat(&it->value, &buf[pos], len);
} else {
it->file->size += len;
write(it->file->fd, &buf[pos], sizeof(char) * len);
}
posData = posKey + 4;
memmove(buf, &buf[chunk - posData], posData);
pos = 0;
break;
} else {
//如果没有找到{boundary},那么分为两种情况
//pos = 0,表示上次没有数据检索,是在整个buf内存块中没有搜索到key,那么写入表单值或临时文件
if (0 == pos) {
if (it->type == CGI_ITEM_FORM) {
concat(&it->value, buf, chunk);
} else {
it->file->size += chunk;
write(it->file->fd, buf, sizeof(char) * chunk);
}
posData = 0;
} else {
//如果存在上次检索,并且没有找到boundary,那么把上次查询的游标位置移动到起始,方便下一轮查询
memmove(buf, &buf[pos], chunk - pos);
posData = chunk - pos;
pos = 0;
}
break;
}
}
}
if (state == PARSE_ERROR && it != NULL && it->type == CGI_ITEM_FILE) {
close(it->file->fd);
}
}
return state;
} |