创意摄影网站,下载优化大师安装桌面,老榕树智能建站系统,百度一下网址大全以下是美团龙猫初稿#xff0c;我改正#xff0c;DeepSeek重新格式化的代码。
重要改正点#xff1a;
1.二分查找用goto控制迭代#xff0c;返回row的正确位置
2.在缓冲区头填上父标签使expat能连续解析不报错
#include stdio.h
#include stdlib.h
#in…以下是美团龙猫初稿我改正DeepSeek重新格式化的代码。
重要改正点
1.二分查找用goto控制迭代返回row的正确位置
2.在缓冲区头填上父标签使expat能连续解析不报错
#include stdio.h
#include stdlib.h
#include string.h
#include expat.h#define MAX_CELL_CONTENT 256typedef struct {int start_row;int end_row;char start_col;char end_col;
} ParseRange;typedef struct {ParseRange range;FILE *csv;FILE *xml_file;XML_Parser parser;int in_row;int current_row;char current_col;int value_started;char temp_value[MAX_CELL_CONTENT];int value_len;int skip_row;long row_start_pos;int first_row_processed;char first_row_max_col;
} ParserState;int parse_excel_range(const char *range_str, ParseRange *range) {if (sscanf(range_str, %c%d:%c%d, range-start_col, range-start_row,range-end_col, range-end_row) ! 4) {return -1;}if (range-start_col range-end_col) return -1;if (range-start_row range-end_row) return -1;return 0;
}long binary_search_row(FILE *file, int target_row) {long low 0;fseek(file, 0, SEEK_END);long high ftell(file);long mid 0;char buffer[1024];int found_row -1;long found_pos 0;while (low high) {
A:mid (low high) / 2;fseek(file, mid, SEEK_SET);int c;while ((c fgetc(file)) ! EOF) { if (c ) {char tag[128] {c};int tag_len 1;while ((c fgetc(file)) ! EOF c ! tag_len sizeof(tag) - 1) {tag[tag_len] c;}tag[tag_len] \0;int rt (strncmp(tag, row, 4) 0);if (rt) {long row_start_pos mid (tag_len);char *row_attr strstr(tag, r\);if (row_attr) {int row_num atoi(row_attr 4);found_pos ftell(file) - tag_len - 1;found_row row_num;if (row_num target_row) {return found_pos;} else if (row_num target_row) {low mid 1;goto A;} else {high mid - 1;goto A;}}}}}if (c EOF) break;}if (found_row target_row) {return found_pos;}return 0;
}void XMLCALL start_element(void *user_data, const XML_Char *name, const XML_Char **attrs) {ParserState *state (ParserState*)user_data;if (strcmp(name, row) 0) {state-row_start_pos XML_GetCurrentByteIndex(state-parser);state-in_row 1;state-current_row -1;state-skip_row 0;for (int i 0; attrs[i]; i 2) {if (strcmp(attrs[i], r) 0) {state-current_row atoi(attrs[i1]);break;}}if (state-current_row state-range.end_row) {XML_StopParser(state-parser, 0);return;} if (state-current_row state-range.start_row) {state-skip_row 1;return;}fprintf(state-csv, %d, state-current_row);}else if (strcmp(name, c) 0 state-in_row !state-skip_row) {for (int i 0; attrs[i]; i 2) {if (strcmp(attrs[i], r) 0) {state-current_col attrs[i1][0];break;}}}else if ((strcmp(name, v) 0 || strcmp(name, t) 0) state-in_row !state-skip_row) {if (state-current_col state-range.start_col state-current_col state-range.end_col) {state-value_started 1;state-value_len 0;state-temp_value[0] \0;}}
}void XMLCALL character_data(void *user_data, const XML_Char *s, int len) {ParserState *state (ParserState*)user_data;if (state-value_started state-value_len len MAX_CELL_CONTENT - 1) {memcpy(state-temp_value state-value_len, s, len);state-value_len len;state-temp_value[state-value_len] \0;}
}void XMLCALL end_element(void *user_data, const XML_Char *name) {ParserState *state (ParserState*)user_data;if (strcmp(name, row) 0 state-in_row !state-skip_row) {for (char col state-current_col 1; col state-range.end_col; col) {fprintf(state-csv, ,);}fprintf(state-csv, \n);state-in_row 0;}else if ((strcmp(name, v) 0 || strcmp(name, t) 0) state-value_started) {if (state-current_col state-range.start_col state-current_col state-range.end_col) {static char last_col 0;if (last_col 0) last_col state-range.start_col;for (char col last_col; col state-current_col; col) {fprintf(state-csv, ,);}fprintf(state-csv, ,%s, state-temp_value);last_col state-current_col 1;}state-value_started 0;}
}int main(int argc, char *argv[]) {if (argc ! 3) {printf(用法: %s xml文件 范围(A1:Z100)\n, argv[0]);return 1;}ParseRange range;if (parse_excel_range(argv[2], range) ! 0) {printf(错误: 无效范围格式\n);return 1;}char csv_filename[256];strncpy(csv_filename, argv[1], sizeof(csv_filename) - 1);char *ext strrchr(csv_filename, .);if (ext) strcpy(ext, .csv);else strncat(csv_filename, .csv, sizeof(csv_filename) - strlen(csv_filename) - 1);FILE *csv fopen(csv_filename, w);if (!csv) {printf(错误: 无法创建CSV\n);return 1;}fprintf(csv, Row);for (char col range.start_col; col range.end_col; col) {fprintf(csv, ,%c, col);}fprintf(csv, \n);FILE *file fopen(argv[1], rb);if (!file) {printf(错误: 无法打开文件 %s\n, argv[1]);fclose(csv);return -1;}long start_pos binary_search_row(file, range.start_row);if (start_pos 0) {fseek(file, start_pos, SEEK_SET);} else {fseek(file, 0, SEEK_SET);}XML_Parser parser XML_ParserCreate(NULL);ParserState state {0};state.range range;state.csv csv;state.parser parser;XML_SetUserData(parser, state);XML_SetElementHandler(parser, start_element, end_element);XML_SetCharacterDataHandler(parser, character_data);fseek(file, start_pos, SEEK_SET);char buffer[8192] sheetData;int done;int i 0;do {if (XML_GetErrorCode(parser) XML_ERROR_FINISHED) break;size_t len fread(buffer 11 * (i 0), 1, sizeof(buffer) - 11 * (i 0), file);done (len sizeof(buffer) - 11 * (i 0));size_t actual_len len;if (!done) {if (XML_Parse(parser, buffer, actual_len 11 * (i 0), done) XML_STATUS_ERROR) {break;}i;}} while (!done);fclose(file);fclose(csv);XML_ParserFree(parser);printf(CSV已保存到 %s\n, csv_filename);return 0;
}编译运行和比较
gcc expatfmt.c -o expatfmt -lexpat -O3
root66d4e20ec1d7:/par# time ./expatfmt lineitem/xl/worksheets/sheet1.xml A500000:Z600000
CSV已保存到 lineitem/xl/worksheets/sheet1.csvreal 0m1.865s
user 0m1.836s
sys 0m0.028sroot66d4e20ec1d7:/par# time ./aich2 lineitem/xl/worksheets/sheet1.xml A500000:Z600000 out.csvreal 0m2.870s
user 0m1.064s
sys 0m0.076s