From: Paulo Marques This patch removes the is-exported bit from the last patch and implements a complete type char, so that /proc/kallsyms resembles better the System.map file. In fact, if compiled with KALLSYMS_ALL the only differences between /proc/kallsyms and System.map are the symbols that are left out on purpose: types 'A' and 'U', and kallsyms_xxx. I removed these symbols from System.map and diff'ed against /proc/kallsyms and the files where completely identical :) The System.map file occupied about 980Kb whereas the kallsyms data needed to generate the same output occupied about 440Kb. Signed-off-by: Andrew Morton --- 25-akpm/kernel/kallsyms.c | 52 +++++++++++---------- 25-akpm/scripts/kallsyms.c | 108 ++++++++++++++++----------------------------- 2 files changed, 67 insertions(+), 93 deletions(-) diff -puN kernel/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms kernel/kallsyms.c --- 25/kernel/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms 2004-09-02 20:17:58.411147624 -0700 +++ 25-akpm/kernel/kallsyms.c 2004-09-02 20:17:58.417146712 -0700 @@ -51,17 +51,16 @@ static inline int is_kernel_text(unsigne given the offset to where the symbol is in the compressed stream */ static unsigned int kallsyms_expand_symbol(unsigned int off, char *result) { - int len; + int len, skipped_first = 0; u8 *tptr, *data; - /* get the compressed symbol length from the first symbol byte, - * masking out the "is_exported" bit */ + /* get the compressed symbol length from the first symbol byte */ data = &kallsyms_names[off]; - len = (*data) & 0x7F; + len = *data; data++; /* update the offset to return the offset for the next symbol on - the compressed stream */ + * the compressed stream */ off += len + 1; /* for every byte on the compressed symbol data, copy the table @@ -72,8 +71,11 @@ static unsigned int kallsyms_expand_symb len--; while (*tptr) { - *result = *tptr; - result++; + if(skipped_first) { + *result = *tptr; + result++; + } else + skipped_first = 1; tptr++; } } @@ -84,24 +86,33 @@ static unsigned int kallsyms_expand_symb return off; } +/* get symbol type information. This is encoded as a single char at the + * begining of the symbol name */ +static char kallsyms_get_symbol_type(unsigned int off) +{ + /* get just the first code, look it up in the token table, and return the + * first char from this token */ + return kallsyms_token_table[ kallsyms_token_index[ kallsyms_names[off+1] ] ]; +} + + /* find the offset on the compressed stream given and index in the - kallsyms array */ + * kallsyms array */ static unsigned int get_symbol_offset(unsigned long pos) { u8 *name; int i; - /* use the closest marker we have. We have markers every - 256 positions, so that should be close enough */ + /* use the closest marker we have. We have markers every 256 positions, + * so that should be close enough */ name = &kallsyms_names[ kallsyms_markers[pos>>8] ]; - /* sequentially scan all the symbols up to the point we're - searching for. Every symbol is stored in a - [bit 7: is_exported | bits 6..0: ][ bytes of data] - format, so we just need to add the len to the current - pointer for every symbol we wish to skip */ + /* sequentially scan all the symbols up to the point we're searching for. + * Every symbol is stored in a [][ bytes of data] format, so we + * just need to add the len to the current pointer for every symbol we + * wish to skip */ for(i = 0; i < (pos&0xFF); i++) - name = name + ((*name) & 0x7F) + 1; + name = name + (*name) + 1; return name - kallsyms_names; } @@ -244,14 +255,7 @@ static unsigned long get_ksymbol_core(st iter->owner = NULL; iter->value = kallsyms_addresses[iter->pos]; - if (is_kernel_text(iter->value) || is_kernel_inittext(iter->value)) - iter->type = 't'; - else - iter->type = 'd'; - - /* check the "is_exported" bit on the compressed stream */ - if (kallsyms_names[off] & 0x80) - iter->type += 'A' - 'a'; + iter->type = kallsyms_get_symbol_type(off); off = kallsyms_expand_symbol(off, iter->name); diff -puN scripts/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms scripts/kallsyms.c --- 25/scripts/kallsyms.c~kallsyms-correct-type-char-in-proc-kallsyms 2004-09-02 20:17:58.412147472 -0700 +++ 25-akpm/scripts/kallsyms.c 2004-09-02 20:17:58.418146560 -0700 @@ -55,7 +55,6 @@ /* flags to mark symbols */ #define SYM_FLAG_VALID 1 #define SYM_FLAG_SAMPLED 2 -#define SYM_FLAG_EXPORTED 4 struct sym_entry { unsigned long long addr; @@ -68,12 +67,9 @@ struct sym_entry { static struct sym_entry *table; static int size, cnt; -static unsigned long long _stext, _etext, _sinittext, _einittext, _start_ksymtab, _stop_ksymtab; +static unsigned long long _stext, _etext, _sinittext, _einittext; static int all_symbols = 0; -/* aray of pointers into the symbol table sorted by name */ -static struct sym_entry **sorted_table; - struct token { unsigned char data[MAX_TOK_SIZE]; unsigned char len; @@ -125,45 +121,56 @@ read_symbol(FILE *in, struct sym_entry * _sinittext = s->addr; else if (strcmp(str, "_einittext") == 0) _einittext = s->addr; - else if (strcmp(str, "__start___ksymtab") == 0) - _start_ksymtab = s->addr; - else if (strcmp(str, "__stop___ksymtab") == 0) - _stop_ksymtab = s->addr; else if (toupper(s->type) == 'A' || toupper(s->type) == 'U') return -1; - s->sym = strdup(str); - s->len = strlen(str); + /* include the type field in the symbol name, so that it gets + * compressed together */ + s->len = strlen(str) + 1; + s->sym = (char *) malloc(s->len + 1); + strcpy(s->sym + 1, str); + s->sym[0] = s->type; + return 0; } static int symbol_valid(struct sym_entry *s) { + /* Symbols which vary between passes. Passes 1 and 2 must have + * identical symbol lists. The kallsyms_* symbols below are only added + * after pass 1, they would be included in pass 2 when --all-symbols is + * specified so exclude them to get a stable symbol list. + */ + static char *special_symbols[] = { + "kallsyms_addresses", + "kallsyms_num_syms", + "kallsyms_names", + "kallsyms_markers", + "kallsyms_token_table", + "kallsyms_token_index", + + /* Exclude linker generated symbols which vary between passes */ + "_SDA_BASE_", /* ppc */ + "_SDA2_BASE_", /* ppc */ + NULL }; + int i; + + /* if --all-symbols is not specified, then symbols outside the text + * and inittext sections are discarded */ if (!all_symbols) { if ((s->addr < _stext || s->addr > _etext) && (s->addr < _sinittext || s->addr > _einittext)) return 0; } - /* Exclude symbols which vary between passes. Passes 1 and 2 must have - * identical symbol lists. The kallsyms_* symbols below are only added - * after pass 1, they would be included in pass 2 when --all-symbols is - * specified so exclude them to get a stable symbol list. - */ - if (strstr(s->sym, "_compiled.") || - strcmp(s->sym, "kallsyms_addresses") == 0 || - strcmp(s->sym, "kallsyms_num_syms") == 0 || - strcmp(s->sym, "kallsyms_names") == 0 || - strcmp(s->sym, "kallsyms_markers") == 0 || - strcmp(s->sym, "kallsyms_token_table") == 0 || - strcmp(s->sym, "kallsyms_token_index") == 0) + /* Exclude symbols which vary between passes. */ + if (strstr(s->sym + 1, "_compiled.")) return 0; - /* Exclude linker generated symbols which vary between passes */ - if (strcmp(s->sym, "_SDA_BASE_") == 0 || /* ppc */ - strcmp(s->sym, "_SDA2_BASE_") == 0) /* ppc */ - return 0; + for (i = 0; special_symbols[i]; i++) + if( strcmp(s->sym + 1, special_symbols[i]) == 0 ) + return 0; return 1; } @@ -267,9 +274,7 @@ write_src(void) if ((valid & 0xFF) == 0) markers[valid >> 8] = off; - k = table[i].len; - if (table[i].flags & SYM_FLAG_EXPORTED) k |= 0x80; - printf("\t.byte 0x%02x", k); + printf("\t.byte 0x%02x", table[i].len); for (k = 0; k < table[i].len; k++) printf(", 0x%02x", table[i].sym[k]); printf("\n"); @@ -463,47 +468,11 @@ static void forget_symbol(unsigned char forget_token(symbol + i, len - i); } -static int symbol_sort(const void *a, const void *b) -{ - return strcmp( (*((struct sym_entry **) a))->sym, - (*((struct sym_entry **) b))->sym ); -} - - -/* find out if a symbol is exported. Exported symbols have a corresponding - * __ksymtab_ entry and their addresses are between __start___ksymtab - * and __stop___ksymtab */ -static int is_exported(char *name) -{ - struct sym_entry key, *ksym, **result; - char buf[KSYM_NAME_LEN+32]; - - sprintf(buf, "__ksymtab_%s", name); - key.sym = buf; - - ksym = &key; - result = bsearch(&ksym, sorted_table, cnt, - sizeof(struct sym_entry *), symbol_sort); - - if(!result) return 0; - - ksym = *result; - - return ((ksym->addr >= _start_ksymtab) && (ksym->addr < _stop_ksymtab)); -} - /* set all the symbol flags and do the initial token count */ static void build_initial_tok_table(void) { int i, use_it, valid; - /* build a sorted symbol pointer array so that searching a particular - * symbol is faster */ - sorted_table = (struct sym_entry **) malloc(sizeof(struct sym_entry *) * cnt); - for (i = 0; i < cnt; i++) - sorted_table[i] = &table[i]; - qsort(sorted_table, cnt, sizeof(struct sym_entry *), symbol_sort); - valid = 0; for (i = 0; i < cnt; i++) { table[i].flags = 0; @@ -515,6 +484,10 @@ static void build_initial_tok_table(void use_it = 0; for (i = 0; i < cnt; i++) { + + /* subsample the available symbols. This method is almost like + * a Bresenham's algorithm to get uniformly distributed samples + * across the symbol table */ if (table[i].flags & SYM_FLAG_VALID) { use_it += WORKING_SET; @@ -523,9 +496,6 @@ static void build_initial_tok_table(void table[i].flags |= SYM_FLAG_SAMPLED; use_it -= valid; } - - if( is_exported(table[i].sym) ) - table[i].flags |= SYM_FLAG_EXPORTED; } if (table[i].flags & SYM_FLAG_SAMPLED) learn_symbol(table[i].sym, table[i].len); _