起因

为什么要选择解析config文件,主要是MONO源码比较庞大且复杂.先从一个模块开始读源码.再者就是对用c语言如何解析xml文件,还是充满好奇的.可能是因为一直用c#的原因,因为.Net Framework 提供xpath/xml linq等方式解析,使用方便,也从来没有想过了解其中的原理.使用的时候也是针对业务的.所以这里带着这份好奇,开始了.

本文中涉及到config文件,其实默认存放在ect/mono目录下的.其实说mono解析config文件.不如说用eglib来解析简单类型xml文件.

函数调用流程

//--driver.c mono_main //mono主函数 //---mono-config.c mono_config_parse mono_config_parse_file mono_config_parse_file_with_context g_file_get_contents //获取config文件内容和文件大小 mono_config_parse_xml_with_context //真正解析xml mono_config_init //创建一个config_handlers哈希表 g_markup_parse_context_new g_markup_parse_context_parse //按字符解析xml g_markup_parse_context_end_parse g_markup_parse_context_free //解析xml结束

先从mono_main函数开始,源码函数行数较多,只保留跟解析config相关代码.

int mono_main (int argc, char* argv[]) { //只保留,解析config文件相关变量 //... char *config_file = NULL; //.... for (i = 1; i < argc; i) { if (strcmp(argv[i], "--config") == 0) { //在运行时,获取--config对应的 if (i 1 >= argc) { fprintf(stderr, "error: --config requires a filename argument\n"); return 1; } config_file = argv[ i]; //如果不指定--config选项,config_file为NULL } } //... mono_set_rootdir (); //设置etc和lib目录 //.... /* Parse gac loading options before loading assemblies. */ if (mono_compile_aot || action == DO_EXEC || action == DO_DEBUGGER) { mono_config_parse (config_file); //调用,由于没有指定--config选项,这里为NULL } //... //... }

mono_config_parse函数:

//解析config文件 void mono_config_parse (const char *filename) { const char *home; char *mono_cfg; #ifndef TARGET_WIN32 char *user_cfg; #endif if (filename) { //由于filename为NULL,不为真 mono_config_parse_file (filename); return; } //获取环境变量MONO_CONFIG char *env_home = g_getenv ("MONO_CONFIG"); if (env_home) { mono_config_parse_file (env_home); return; } //在mono_main函数,mono_set_rootdir已经设置过mono的etc和lib目录 //mono_get_config_dir函数主要是获取mono_cfg_dir全局变量的值,mono_cfg_dir存放的是etc目录路径 //拼接具体config所在的路径 mono_cfg = g_build_filename (mono_get_config_dir (), "mono", "config", NULL); mono_config_parse_file (mono_cfg); //解析config文件 g_free (mono_cfg); #if !defined(TARGET_WIN32) home = g_get_home_dir (); user_cfg = g_strconcat (home, G_DIR_SEPARATOR_S, ".mono/config", NULL); mono_config_parse_file (user_cfg); g_free (user_cfg); #endif }

先不上代码,先看一下图

mongo配置文件详解(Mono源码学习-如何解析config文件)(1)

mono加载config文件到内存,然后创建parse context,开始按字符解析xml

mono_config_parse_file源码

static void mono_config_parse_file (const char *filename) { Parsestate state = {NULL}; //初始化ParseState state.user_data = (gpointer) filename; //user_data存放config所在路径 mono_config_parse_file_with_context (&state, filename); //读取config文件到内存上,并开始按字符解析 }

mono_config_parse_file_with_context源码

/* If assembly is NULL, parse in the global context */ static int mono_config_parse_file_with_context (ParseState *state, const char *filename) { gchar *text; gsize len; gint offset; mono_trace (G_LOG_LEVEL_INFO, MONO_TRACE_CONFIG, "Config attempting to parse: '%s'.", filename); //在函数内容对text进行分配内存空间,获取config文件大小和内容 if (!g_file_get_contents (filename, &text, &len, NULL)) return 0; offset = 0; if (len > 3 && text [0] == '\xef' && text [1] == (gchar) '\xbb' && text [2] == '\xbf') offset = 3; /* Skip UTF-8 BOM */ if (state->user_data == NULL) //在一次判读user_data是否为空,为空,就保存filename的地址 state->user_data = (gpointer) filename; //***重点*** mono_config_parse_xml_with_context (state, text offset, len - offset); g_free (text); //释放text指向的内存空间 return 1; }

g_file_get_contents代码较多,我简单用readfile函数进行替代,更容易理解

#define _CRT_SECURE_NO_WARNINGS #include <stdio.h> #include <stdlib.h> #include <assert.h> /* * filename 要读取文件的名称 * content char类型二级指针,存放要读取文件的内容,返回 * len int类型指针,存放文件的长度,返回 */ int readfile(char* filename, char** content, int* len) { assert(filename != NULL); FILE* pfile = fopen(filename, "r"); assert(pfile != NULL); fseek(pfile, 0, SEEK_END); //1. 将文件指针移动到文件尾部 int total = ftell(pfile); //2. 获取文件的大小 fseek(pfile, 0, SEEK_SET); //3. 在将文件指针移动到文件头部,方便将文件读取到字符串 char* str = calloc(1, total 1); //4. 根据文件大小进行分配空间 int result = fread(str, total 1, sizeof(char), pfile); //5. 读取文件内容到str中 fclose(pfile); str[total] = '\0'; *content = str; *len = total; return 0; } int main(int argc, char* argv[]) { char* filename = "config"; char* content; //存放读取文件的内容 int len; //存放文件的长度 readfile(filename, &content, &len); printf("%s\n", content); printf("--------------------------\n"); printf("%d\n", len); return 0; }

其实g_file_get_contents函数代码不是很多,只有50行左右,只是展示不是很方便,更多的留给g_markup_parse_context_parse函数.

g_markup_parse_context_parse函数

因为这个一看结构较多,还有回调函数也很多.简单用图说一下

mongo配置文件详解(Mono源码学习-如何解析config文件)(2)

mono解析xml,转换器

gboolean g_markup_parse_context_parse (GMarkupParseContext *context, const gchar *text, gssize text_len, GError **gerror) { const char *p, *end; end = text text_len; //根据文件大小,得出文件末尾的位置,方便下边循环有终止条件 //逐个字符遍历,进行提取 for (p = text; p < end; p ){ char c = *p; switch (context->state){ case START: if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || (c & 0x80)) continue; if (c == '<'){ if (p 1 < end && p [1] == '?'){ context->state = SKIP_XML_DECLARATION; p ; } else context->state = START_ELEMENT; continue; } set_error ("%s", "Expected < to start the document"); goto fail; case SKIP_XML_DECLARATION: case START_ELEMENT: { const char *element_start = p, *element_end; char *ename = NULL; int full_stop = 0, l; gchar **names = NULL, **values = NULL; for (; p < end && my_isspace (*p); p ) ; if (p == end){ set_error ("%s", "Unfinished element"); goto fail; } if (*p == '!' && (p 2 < end) && (p [1] == '-') && (p [2] == '-')){ context->state = COMMENT; p = 2; break; } if (!my_isnamestartchar (*p)){ set_error ("%s", "Expected an element name"); goto fail; } for ( p; p < end && my_isnamechar (*p); p ) ; if (p == end){ set_error ("%s", "Expected an element"); goto fail; } element_end = p; for (; p < end && my_isspace (*p); p ) ; if (p == end){ set_error ("%s", "Unfinished element"); goto fail; } p = parse_attributes (p, end, &names, &values, gerror, &full_stop, context->state); if (p == end){ if (names != NULL) { g_strfreev (names); g_strfreev (values); } /* Only set the error if parse_attributes did not */ if (gerror != NULL && *gerror == NULL) set_error ("%s", "Unfinished sequence"); goto fail; } l = (int)(element_end - element_start); ename = g_malloc (l 1); if (ename == NULL) goto fail; strncpy (ename, element_start, l); ename [l] = 0; if (context->state == START_ELEMENT) if (context->parser.start_element != NULL) context->parser.start_element (context, ename, (const gchar **) names, (const gchar **) values, context->user_data, gerror); if (names != NULL){ g_strfreev (names); g_strfreev (values); } if (gerror != NULL && *gerror != NULL){ g_free (ename); goto fail; } if (full_stop){ if (context->parser.end_element != NULL && context->state == START_ELEMENT){ context->parser.end_element (context, ename, context->user_data, gerror); if (gerror != NULL && *gerror != NULL){ g_free (ename); goto fail; } } g_free (ename); } else { context->level = g_slist_prepend (context->level, ename); } context->state = TEXT; break; } /* case START_ELEMENT */ case TEXT: { if (c == '<'){ context->state = FLUSH_TEXT; break; } if (context->parser.text != NULL){ if (context->text == NULL) context->text = g_string_new (""); g_string_append_c (context->text, c); } break; } case COMMENT: if (*p != '-') break; if (p 2 < end && (p [1] == '-') && (p [2] == '>')){ context->state = TEXT; p = 2; break; } break; case FLUSH_TEXT: if (context->parser.text != NULL && context->text != NULL){ context->parser.text (context, context->text->str, context->text->len, context->user_data, gerror); if (gerror != NULL && *gerror != NULL) goto fail; } if (c == '/') context->state = CLOSING_ELEMENT; else { p--; context->state = START_ELEMENT; } break; case CLOSING_ELEMENT: { GSList *current = context->level; char *text; if (context->level == NULL){ set_error ("%s", "Too many closing tags, not enough open tags"); goto fail; } text = current->data; if (context->parser.end_element != NULL){ context->parser.end_element (context, text, context->user_data, gerror); if (gerror != NULL && *gerror != NULL){ g_free (text); goto fail; } } g_free (text); while (p < end && *p != '>') p ; context->level = context->level->next; g_slist_free_1 (current); context->state = TEXT; break; } /* case CLOSING_ELEMENT */ } /* switch */ } return TRUE; fail: if (context->parser.error && gerror != NULL && *gerror) context->parser.error (context, *gerror, context->user_data); destroy_parse_state (context); return FALSE; }

本文大体理了一下大体思路,涉及到结构和函数指针,以及多级指针,是没有讲的,准备单独来讲.

,