-DPHP_PCRE_NO_INDEX_NAMED_CAPTURE

引き続き ext/pcre/php_pcre.c を改造。
コールバック関数にも名前付きでキャプチャして渡せるようにしたのに加え、PHP_PCRE_NO_INDEX_NAMED_CAPTURE マクロを定義してコンパイルしたときは、名前付きでキャプチャした箇所は添字番号でキャプチャされないようにしてみました。
ほんのちょっとメモリが節約できるのと、ほんのちょっと print_r()/var_dump() した結果が見やすくなるぐらいしかいいことないんですが。

--- php_pcre.c.orig	2006-10-10 21:44:02.000000000 +0900
+++ php_pcre.c	2006-11-14 19:51:11.000000000 +0900
@@ -417,7 +417,7 @@
 /* }}} */
 
 /* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
+static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int index)
 {
 	zval *match_pair;
 
@@ -429,11 +429,21 @@
 	add_next_index_stringl(match_pair, str, len, 1);
 	add_next_index_long(match_pair, offset);
 	
+#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE
+	if (name) {
+		zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
+	} else if (index != -1) {
+		zend_hash_index_update(Z_ARRVAL_P(result), index, &match_pair, sizeof(zval *), NULL);
+	} else {
+		zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
+	}
+#else
 	if (name) {
 		zval_add_ref(&match_pair);
 		zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
 	}
 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
+#endif
 }
 /* }}} */
 
@@ -631,7 +641,7 @@
 						for (i = 0; i < count; i++) {
 							if (offset_capture) {
 								add_offset_pair(match_sets[i], (char *)stringlist[i],
-												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, -1);
 							} else {
 								add_next_index_stringl(match_sets[i], (char *)stringlist[i],
 													   offsets[(i<<1)+1] - offsets[i<<1], 1);
@@ -657,14 +667,21 @@
 						for (i = 0; i < count; i++) {
 							if (offset_capture) {
 								add_offset_pair(result_set, (char *)stringlist[i],
-												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
+												offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], i);
 							} else {
 								if (subpat_names[i]) {
 									add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
 														   offsets[(i<<1)+1] - offsets[i<<1], 1);
 								}
+#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE
+								else {
+									add_index_stringl(result_set, i, (char *)stringlist[i],
+													  offsets[(i<<1)+1] - offsets[i<<1], 1);
+								}
+#else
 								add_next_index_stringl(result_set, (char *)stringlist[i],
 													   offsets[(i<<1)+1] - offsets[i<<1], 1);
+#endif
 							}
 						}
 						/* And add it to the output array */
@@ -676,14 +693,21 @@
 						if (offset_capture) {
 							add_offset_pair(subpats, (char *)stringlist[i],
 											offsets[(i<<1)+1] - offsets[i<<1],
-											offsets[i<<1], subpat_names[i]);
+											offsets[i<<1], subpat_names[i], i);
 						} else {
 							if (subpat_names[i]) {
 								add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
 												  offsets[(i<<1)+1] - offsets[i<<1], 1);
 							}
+#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE
+							else {
+								add_index_stringl(subpats, i, (char *)stringlist[i],
+												  offsets[(i<<1)+1] - offsets[i<<1], 1);
+							}
+#else
 							add_next_index_stringl(subpats, (char *)stringlist[i],
 												   offsets[(i<<1)+1] - offsets[i<<1], 1);
+#endif
 						}
 					}
 				}
@@ -792,7 +816,7 @@
 
 /* {{{ preg_do_repl_func
  */
-static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
+static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **subpat_names, char **result TSRMLS_DC)
 {
 	zval		*retval_ptr;		/* Function return value */
 	zval	   **args[1];			/* Argument to pass to function */
@@ -802,8 +826,18 @@
 
 	MAKE_STD_ZVAL(subpats);
 	array_init(subpats);
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
+		if (subpat_names[i]) {
+			add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+		}
+#ifdef PHP_PCRE_NO_INDEX_NAMED_CAPTURE
+		else {
+			add_index_stringl(subpats, i, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+		}
+#else
 		add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
+#endif
+	}
 	args[0] = &subpats;
 
 	if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@@ -938,6 +972,7 @@
 	int				 exoptions = 0;		/* Execution options */
 	int				 count = 0;			/* Count of matched subpatterns */
 	int				*offsets;			/* Array of subpattern offsets */
+	int				 num_subpats;		/* Number of captured subpatterns */
 	int				 size_offsets;		/* Size of the offsets array */
 	int				 new_len;			/* Length of needed storage */
 	int				 alloc_len;			/* Actual allocated length */
@@ -959,6 +994,7 @@
 					*replace_end=NULL,	/* End of replacement string */
 					*eval_result,		/* Result of eval or custom function */
 					 walk_last;			/* Last walked character */
+	char 		   **subpat_names = NULL;/* Array for named subpatterns */
 	int				 rc;
 
 	if (extra == NULL) {
@@ -981,17 +1017,66 @@
 	}
 
 	/* Calculate the size of the offsets array, and allocate memory for it. */
-	rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
+	rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
 	if (rc < 0) {
 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
 		return NULL;
 	}
-	size_offsets = (size_offsets + 1) * 3;
+	num_subpats++;
+	size_offsets = num_subpats * 3;
 	offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
 	
 	alloc_len = 2 * subject_len + 1;
 	result = safe_emalloc(alloc_len, sizeof(char), 0);
 
+	/*
+	 * Build a mapping from subpattern numbers to their names. We will always
+	 * allocate the table, even though there may be no named subpatterns. This
+	 * avoids somewhat more complicated logic in the inner loops.
+	 */
+	if (is_callable_replace) {
+		subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
+		memset(subpat_names, 0, sizeof(char *) * num_subpats);
+
+		int name_cnt = 0, name_size, ni = 0;
+		char *name_table;
+		unsigned short name_idx;
+
+		rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
+		if (rc < 0) {
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+			efree(offsets);
+			efree(subpat_names);
+			return NULL;
+		}
+		if (name_cnt > 0) {
+			int rc1, rc2;
+			long dummy_l;
+			double dummy_d;
+			rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
+			rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
+			rc = rc2 ? rc2 : rc1;
+			if (rc < 0) {
+				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+				efree(offsets);
+				efree(subpat_names);
+				return NULL;
+			}
+
+			while (ni++ < name_cnt) {
+				name_idx = 0xff * name_table[0] + name_table[1];
+				subpat_names[name_idx] = name_table + 2;
+				if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), &dummy_l, &dummy_d, 0) > 0) {
+					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
+					efree(offsets);
+					efree(subpat_names);
+					return NULL;
+				}
+				name_table += name_size;
+			}
+		}
+	}
+
 	/* Initialize */
 	match = NULL;
 	*result_len = 0;
@@ -1028,7 +1113,7 @@
 			} else if (is_callable_replace) {
 				/* Use custom function to get replacement string and its length. */
 				eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
-													count, &eval_result TSRMLS_CC);
+													count, subpat_names, &eval_result TSRMLS_CC);
 				new_len += eval_result_len;
 			} else { /* do regular substitution */
 				walk = replace;
@@ -1145,6 +1230,9 @@
 	}
 	
 	efree(offsets);
+	if (subpat_names != NULL) {
+		efree(subpat_names);
+	}
 
 	return result;
 }
@@ -1450,7 +1538,7 @@
 
 				if (offset_capture) {
 					/* Add (match, offset) pair to the return value */
-					add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
+					add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL, -1);
 				} else {
 					/* Add the piece to the return value */
 					add_next_index_stringl(return_value, last_match,
@@ -1472,7 +1560,7 @@
 					/* If we have matched a delimiter */
 					if (!no_empty || match_len > 0) {
 						if (offset_capture) {
-							add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
+							add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, -1);
 						} else {
 							add_next_index_stringl(return_value,
 												   &subject[offsets[i<<1]],
@@ -1529,7 +1617,7 @@
 	{
 		if (offset_capture) {
 			/* Add the last (match, offset) pair to the return value */
-			add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
+			add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, -1);
 		} else {
 			/* Add the last piece to the return value */
 			add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);