引き続き ext/pcre/php_pcre.c を改造。
コールバック関数にも名前付きでキャプチャして渡せるようにしたのに加え、PHP_PCRE_NO_INDEX_NAMED_CAPTURE マクロを定義してコンパイルしたときは、名前付きでキャプチャした箇所は添字番号でキャプチャされないようにしてみました。
ほんのちょっとメモリが節約できるのと、ほんのちょっと print_r()/var_dump() した結果が見やすくなるぐらいしかいいことないんですが。
--- php_pcre.c.orig 2006-10-10 21:44:02.000000000 +0900 +++ php_pcre.c 2006-11-14 19:51:11.000000000 +0900 @@ -417,7 +417,7 @@ /* }}} */ /* {{{ add_offset_pair */ -static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) +static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int index) { zval *match_pair; @@ -429,11 +429,21 @@ add_next_index_stringl(match_pair, str, len, 1); add_next_index_long(match_pair, offset); +#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE + if (name) { + zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); + } else if (index != -1) { + zend_hash_index_update(Z_ARRVAL_P(result), index, &match_pair, sizeof(zval *), NULL); + } else { + zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); + } +#else if (name) { zval_add_ref(&match_pair); zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); } zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); +#endif } /* }}} */ @@ -631,7 +641,7 @@ for (i = 0; i < count; i++) { if (offset_capture) { add_offset_pair(match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, -1); } else { add_next_index_stringl(match_sets[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); @@ -657,14 +667,21 @@ for (i = 0; i < count; i++) { if (offset_capture) { add_offset_pair(result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], i); } else { if (subpat_names[i]) { add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } +#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE + else { + add_index_stringl(result_set, i, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], 1); + } +#else add_next_index_stringl(result_set, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); +#endif } } /* And add it to the output array */ @@ -676,14 +693,21 @@ if (offset_capture) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i]); + offsets[i<<1], subpat_names[i], i); } else { if (subpat_names[i]) { add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); } +#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE + else { + add_index_stringl(subpats, i, (char *)stringlist[i], + offsets[(i<<1)+1] - offsets[i<<1], 1); + } +#else add_next_index_stringl(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], 1); +#endif } } } @@ -792,7 +816,7 @@ /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **subpat_names, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -802,8 +826,18 @@ MAKE_STD_ZVAL(subpats); array_init(subpats); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + if (subpat_names[i]) { + add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); + } +#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE + else { + add_index_stringl(subpats, i, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); + } +#else add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); +#endif + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -938,6 +972,7 @@ int exoptions = 0; /* Execution options */ int count = 0; /* Count of matched subpatterns */ int *offsets; /* Array of subpattern offsets */ + int num_subpats; /* Number of captured subpatterns */ int size_offsets; /* Size of the offsets array */ int new_len; /* Length of needed storage */ int alloc_len; /* Actual allocated length */ @@ -959,6 +994,7 @@ *replace_end=NULL, /* End of replacement string */ *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ + char **subpat_names = NULL;/* Array for named subpatterns */ int rc; if (extra == NULL) { @@ -981,17 +1017,66 @@ } /* Calculate the size of the offsets array, and allocate memory for it. */ - rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); if (rc < 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); return NULL; } - size_offsets = (size_offsets + 1) * 3; + num_subpats++; + size_offsets = num_subpats * 3; offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0); alloc_len = 2 * subject_len + 1; result = safe_emalloc(alloc_len, sizeof(char), 0); + /* + * Build a mapping from subpattern numbers to their names. We will always + * allocate the table, even though there may be no named subpatterns. This + * avoids somewhat more complicated logic in the inner loops. + */ + if (is_callable_replace) { + subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0); + memset(subpat_names, 0, sizeof(char *) * num_subpats); + + int name_cnt = 0, name_size, ni = 0; + char *name_table; + unsigned short name_idx; + + rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt); + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(offsets); + efree(subpat_names); + return NULL; + } + if (name_cnt > 0) { + int rc1, rc2; + long dummy_l; + double dummy_d; + rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table); + rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size); + rc = rc2 ? rc2 : rc1; + if (rc < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc); + efree(offsets); + efree(subpat_names); + return NULL; + } + + while (ni++ < name_cnt) { + name_idx = 0xff * name_table[0] + name_table[1]; + subpat_names[name_idx] = name_table + 2; + if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), &dummy_l, &dummy_d, 0) > 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed"); + efree(offsets); + efree(subpat_names); + return NULL; + } + name_table += name_size; + } + } + } + /* Initialize */ match = NULL; *result_len = 0; @@ -1028,7 +1113,7 @@ } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ eval_result_len = preg_do_repl_func(replace_val, subject, offsets, - count, &eval_result TSRMLS_CC); + count, subpat_names, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1145,6 +1230,9 @@ } efree(offsets); + if (subpat_names != NULL) { + efree(subpat_names); + } return result; } @@ -1450,7 +1538,7 @@ if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL); + add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL, -1); } else { /* Add the piece to the return value */ add_next_index_stringl(return_value, last_match, @@ -1472,7 +1560,7 @@ /* If we have matched a delimiter */ if (!no_empty || match_len > 0) { if (offset_capture) { - add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); + add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, -1); } else { add_next_index_stringl(return_value, &subject[offsets[i<<1]], @@ -1529,7 +1617,7 @@ { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); + add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, -1); } else { /* Add the last piece to the return value */ add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);