|
Ruby
2.0.0p481(2014-05-08revision45883)
|
00001 /********************************************************************** 00002 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp> 00007 * All rights reserved. 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in the 00016 * documentation and/or other materials provided with the distribution. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00019 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00022 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00023 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00024 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00025 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00026 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00027 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00028 * SUCH DAMAGE. 00029 */ 00030 00031 #include "regparse.h" 00032 00033 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; 00034 00035 extern OnigCaseFoldType 00036 onig_get_default_case_fold_flag(void) 00037 { 00038 return OnigDefaultCaseFoldFlag; 00039 } 00040 00041 extern int 00042 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 00043 { 00044 OnigDefaultCaseFoldFlag = case_fold_flag; 00045 return 0; 00046 } 00047 00048 00049 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 00050 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; 00051 #endif 00052 00053 #if 0 00054 static UChar* 00055 str_dup(UChar* s, UChar* end) 00056 { 00057 ptrdiff_t len = end - s; 00058 00059 if (len > 0) { 00060 UChar* r = (UChar* )xmalloc(len + 1); 00061 CHECK_NULL_RETURN(r); 00062 xmemcpy(r, s, len); 00063 r[len] = (UChar )0; 00064 return r; 00065 } 00066 else return NULL; 00067 } 00068 #endif 00069 00070 static void 00071 swap_node(Node* a, Node* b) 00072 { 00073 Node c; 00074 c = *a; *a = *b; *b = c; 00075 00076 if (NTYPE(a) == NT_STR) { 00077 StrNode* sn = NSTR(a); 00078 if (sn->capa == 0) { 00079 size_t len = sn->end - sn->s; 00080 sn->s = sn->buf; 00081 sn->end = sn->s + len; 00082 } 00083 } 00084 00085 if (NTYPE(b) == NT_STR) { 00086 StrNode* sn = NSTR(b); 00087 if (sn->capa == 0) { 00088 size_t len = sn->end - sn->s; 00089 sn->s = sn->buf; 00090 sn->end = sn->s + len; 00091 } 00092 } 00093 } 00094 00095 static OnigDistance 00096 distance_add(OnigDistance d1, OnigDistance d2) 00097 { 00098 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) 00099 return ONIG_INFINITE_DISTANCE; 00100 else { 00101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; 00102 else return ONIG_INFINITE_DISTANCE; 00103 } 00104 } 00105 00106 static OnigDistance 00107 distance_multiply(OnigDistance d, int m) 00108 { 00109 if (m == 0) return 0; 00110 00111 if (d < ONIG_INFINITE_DISTANCE / m) 00112 return d * m; 00113 else 00114 return ONIG_INFINITE_DISTANCE; 00115 } 00116 00117 static int 00118 bitset_is_empty(BitSetRef bs) 00119 { 00120 int i; 00121 for (i = 0; i < BITSET_SIZE; i++) { 00122 if (bs[i] != 0) return 0; 00123 } 00124 return 1; 00125 } 00126 00127 #ifdef ONIG_DEBUG 00128 static int 00129 onig_is_prelude(void) 00130 { 00131 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE")); 00132 } 00133 00134 static int 00135 bitset_on_num(BitSetRef bs) 00136 { 00137 int i, n; 00138 00139 n = 0; 00140 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 00141 if (BITSET_AT(bs, i)) n++; 00142 } 00143 return n; 00144 } 00145 #endif 00146 00147 extern int 00148 onig_bbuf_init(BBuf* buf, OnigDistance size) 00149 { 00150 if (size <= 0) { 00151 size = 0; 00152 buf->p = NULL; 00153 } 00154 else { 00155 buf->p = (UChar* )xmalloc(size); 00156 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); 00157 } 00158 00159 buf->alloc = (unsigned int )size; 00160 buf->used = 0; 00161 return 0; 00162 } 00163 00164 00165 #ifdef USE_SUBEXP_CALL 00166 00167 static int 00168 unset_addr_list_init(UnsetAddrList* uslist, int size) 00169 { 00170 UnsetAddr* p; 00171 00172 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); 00173 CHECK_NULL_RETURN_MEMERR(p); 00174 uslist->num = 0; 00175 uslist->alloc = size; 00176 uslist->us = p; 00177 return 0; 00178 } 00179 00180 static void 00181 unset_addr_list_end(UnsetAddrList* uslist) 00182 { 00183 if (IS_NOT_NULL(uslist->us)) 00184 xfree(uslist->us); 00185 } 00186 00187 static int 00188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) 00189 { 00190 UnsetAddr* p; 00191 int size; 00192 00193 if (uslist->num >= uslist->alloc) { 00194 size = uslist->alloc * 2; 00195 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); 00196 CHECK_NULL_RETURN_MEMERR(p); 00197 uslist->alloc = size; 00198 uslist->us = p; 00199 } 00200 00201 uslist->us[uslist->num].offset = offset; 00202 uslist->us[uslist->num].target = node; 00203 uslist->num++; 00204 return 0; 00205 } 00206 #endif /* USE_SUBEXP_CALL */ 00207 00208 00209 static int 00210 add_opcode(regex_t* reg, int opcode) 00211 { 00212 BBUF_ADD1(reg, opcode); 00213 return 0; 00214 } 00215 00216 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00217 static int 00218 add_state_check_num(regex_t* reg, int num) 00219 { 00220 StateCheckNumType n = (StateCheckNumType )num; 00221 00222 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); 00223 return 0; 00224 } 00225 #endif 00226 00227 static int 00228 add_rel_addr(regex_t* reg, int addr) 00229 { 00230 RelAddrType ra = (RelAddrType )addr; 00231 00232 BBUF_ADD(reg, &ra, SIZE_RELADDR); 00233 return 0; 00234 } 00235 00236 static int 00237 add_abs_addr(regex_t* reg, int addr) 00238 { 00239 AbsAddrType ra = (AbsAddrType )addr; 00240 00241 BBUF_ADD(reg, &ra, SIZE_ABSADDR); 00242 return 0; 00243 } 00244 00245 static int 00246 add_length(regex_t* reg, OnigDistance len) 00247 { 00248 LengthType l = (LengthType )len; 00249 00250 BBUF_ADD(reg, &l, SIZE_LENGTH); 00251 return 0; 00252 } 00253 00254 static int 00255 add_mem_num(regex_t* reg, int num) 00256 { 00257 MemNumType n = (MemNumType )num; 00258 00259 BBUF_ADD(reg, &n, SIZE_MEMNUM); 00260 return 0; 00261 } 00262 00263 static int 00264 add_pointer(regex_t* reg, void* addr) 00265 { 00266 PointerType ptr = (PointerType )addr; 00267 00268 BBUF_ADD(reg, &ptr, SIZE_POINTER); 00269 return 0; 00270 } 00271 00272 static int 00273 add_option(regex_t* reg, OnigOptionType option) 00274 { 00275 BBUF_ADD(reg, &option, SIZE_OPTION); 00276 return 0; 00277 } 00278 00279 static int 00280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr) 00281 { 00282 int r; 00283 00284 r = add_opcode(reg, opcode); 00285 if (r) return r; 00286 r = add_rel_addr(reg, addr); 00287 return r; 00288 } 00289 00290 static int 00291 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len) 00292 { 00293 BBUF_ADD(reg, bytes, len); 00294 return 0; 00295 } 00296 00297 static int 00298 add_bitset(regex_t* reg, BitSetRef bs) 00299 { 00300 BBUF_ADD(reg, bs, SIZE_BITSET); 00301 return 0; 00302 } 00303 00304 static int 00305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) 00306 { 00307 int r; 00308 00309 r = add_opcode(reg, opcode); 00310 if (r) return r; 00311 r = add_option(reg, option); 00312 return r; 00313 } 00314 00315 static int compile_length_tree(Node* node, regex_t* reg); 00316 static int compile_tree(Node* node, regex_t* reg); 00317 00318 00319 #define IS_NEED_STR_LEN_OP_EXACT(op) \ 00320 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ 00321 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) 00322 00323 static int 00324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) 00325 { 00326 int op; 00327 00328 if (ignore_case) { 00329 switch (str_len) { 00330 case 1: op = OP_EXACT1_IC; break; 00331 default: op = OP_EXACTN_IC; break; 00332 } 00333 } 00334 else { 00335 switch (mb_len) { 00336 case 1: 00337 switch (str_len) { 00338 case 1: op = OP_EXACT1; break; 00339 case 2: op = OP_EXACT2; break; 00340 case 3: op = OP_EXACT3; break; 00341 case 4: op = OP_EXACT4; break; 00342 case 5: op = OP_EXACT5; break; 00343 default: op = OP_EXACTN; break; 00344 } 00345 break; 00346 00347 case 2: 00348 switch (str_len) { 00349 case 1: op = OP_EXACTMB2N1; break; 00350 case 2: op = OP_EXACTMB2N2; break; 00351 case 3: op = OP_EXACTMB2N3; break; 00352 default: op = OP_EXACTMB2N; break; 00353 } 00354 break; 00355 00356 case 3: 00357 op = OP_EXACTMB3N; 00358 break; 00359 00360 default: 00361 op = OP_EXACTMBN; 00362 break; 00363 } 00364 } 00365 return op; 00366 } 00367 00368 static int 00369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) 00370 { 00371 int r; 00372 int saved_num_null_check = reg->num_null_check; 00373 00374 if (empty_info != 0) { 00375 r = add_opcode(reg, OP_NULL_CHECK_START); 00376 if (r) return r; 00377 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ 00378 if (r) return r; 00379 reg->num_null_check++; 00380 } 00381 00382 r = compile_tree(node, reg); 00383 if (r) return r; 00384 00385 if (empty_info != 0) { 00386 if (empty_info == NQ_TARGET_IS_EMPTY) 00387 r = add_opcode(reg, OP_NULL_CHECK_END); 00388 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) 00389 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); 00390 else if (empty_info == NQ_TARGET_IS_EMPTY_REC) 00391 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); 00392 00393 if (r) return r; 00394 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ 00395 } 00396 return r; 00397 } 00398 00399 #ifdef USE_SUBEXP_CALL 00400 static int 00401 compile_call(CallNode* node, regex_t* reg) 00402 { 00403 int r; 00404 00405 r = add_opcode(reg, OP_CALL); 00406 if (r) return r; 00407 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), 00408 node->target); 00409 if (r) return r; 00410 r = add_abs_addr(reg, 0 /*dummy addr.*/); 00411 return r; 00412 } 00413 #endif 00414 00415 static int 00416 compile_tree_n_times(Node* node, int n, regex_t* reg) 00417 { 00418 int i, r; 00419 00420 for (i = 0; i < n; i++) { 00421 r = compile_tree(node, reg); 00422 if (r) return r; 00423 } 00424 return 0; 00425 } 00426 00427 static int 00428 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, 00429 regex_t* reg ARG_UNUSED, int ignore_case) 00430 { 00431 int len; 00432 int op = select_str_opcode(mb_len, str_len, ignore_case); 00433 00434 len = SIZE_OPCODE; 00435 00436 if (op == OP_EXACTMBN) len += SIZE_LENGTH; 00437 if (IS_NEED_STR_LEN_OP_EXACT(op)) 00438 len += SIZE_LENGTH; 00439 00440 len += mb_len * (int )str_len; 00441 return len; 00442 } 00443 00444 static int 00445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len, 00446 regex_t* reg, int ignore_case) 00447 { 00448 int op = select_str_opcode(mb_len, str_len, ignore_case); 00449 add_opcode(reg, op); 00450 00451 if (op == OP_EXACTMBN) 00452 add_length(reg, mb_len); 00453 00454 if (IS_NEED_STR_LEN_OP_EXACT(op)) { 00455 if (op == OP_EXACTN_IC) 00456 add_length(reg, mb_len * str_len); 00457 else 00458 add_length(reg, str_len); 00459 } 00460 00461 add_bytes(reg, s, mb_len * str_len); 00462 return 0; 00463 } 00464 00465 00466 static int 00467 compile_length_string_node(Node* node, regex_t* reg) 00468 { 00469 int rlen, r, len, prev_len, slen, ambig; 00470 OnigEncoding enc = reg->enc; 00471 UChar *p, *prev; 00472 StrNode* sn; 00473 00474 sn = NSTR(node); 00475 if (sn->end <= sn->s) 00476 return 0; 00477 00478 ambig = NSTRING_IS_AMBIG(node); 00479 00480 p = prev = sn->s; 00481 prev_len = enclen(enc, p, sn->end); 00482 p += prev_len; 00483 slen = 1; 00484 rlen = 0; 00485 00486 for (; p < sn->end; ) { 00487 len = enclen(enc, p, sn->end); 00488 if (len == prev_len) { 00489 slen++; 00490 } 00491 else { 00492 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00493 rlen += r; 00494 prev = p; 00495 slen = 1; 00496 prev_len = len; 00497 } 00498 p += len; 00499 } 00500 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00501 rlen += r; 00502 return rlen; 00503 } 00504 00505 static int 00506 compile_length_string_raw_node(StrNode* sn, regex_t* reg) 00507 { 00508 if (sn->end <= sn->s) 00509 return 0; 00510 00511 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00512 } 00513 00514 static int 00515 compile_string_node(Node* node, regex_t* reg) 00516 { 00517 int r, len, prev_len, slen, ambig; 00518 OnigEncoding enc = reg->enc; 00519 UChar *p, *prev, *end; 00520 StrNode* sn; 00521 00522 sn = NSTR(node); 00523 if (sn->end <= sn->s) 00524 return 0; 00525 00526 end = sn->end; 00527 ambig = NSTRING_IS_AMBIG(node); 00528 00529 p = prev = sn->s; 00530 prev_len = enclen(enc, p, end); 00531 p += prev_len; 00532 slen = 1; 00533 00534 for (; p < end; ) { 00535 len = enclen(enc, p, end); 00536 if (len == prev_len) { 00537 slen++; 00538 } 00539 else { 00540 r = add_compile_string(prev, prev_len, slen, reg, ambig); 00541 if (r) return r; 00542 00543 prev = p; 00544 slen = 1; 00545 prev_len = len; 00546 } 00547 00548 p += len; 00549 } 00550 return add_compile_string(prev, prev_len, slen, reg, ambig); 00551 } 00552 00553 static int 00554 compile_string_raw_node(StrNode* sn, regex_t* reg) 00555 { 00556 if (sn->end <= sn->s) 00557 return 0; 00558 00559 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00560 } 00561 00562 static int 00563 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) 00564 { 00565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00566 add_length(reg, mbuf->used); 00567 return add_bytes(reg, mbuf->p, mbuf->used); 00568 #else 00569 int r, pad_size; 00570 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; 00571 00572 GET_ALIGNMENT_PAD_SIZE(p, pad_size); 00573 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); 00574 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00575 00576 r = add_bytes(reg, mbuf->p, mbuf->used); 00577 00578 /* padding for return value from compile_length_cclass_node() to be fix. */ 00579 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; 00580 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00581 return r; 00582 #endif 00583 } 00584 00585 static int 00586 compile_length_cclass_node(CClassNode* cc, regex_t* reg) 00587 { 00588 int len; 00589 00590 if (IS_NCCLASS_SHARE(cc)) { 00591 len = SIZE_OPCODE + SIZE_POINTER; 00592 return len; 00593 } 00594 00595 if (IS_NULL(cc->mbuf)) { 00596 len = SIZE_OPCODE + SIZE_BITSET; 00597 } 00598 else { 00599 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00600 len = SIZE_OPCODE; 00601 } 00602 else { 00603 len = SIZE_OPCODE + SIZE_BITSET; 00604 } 00605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00606 len += SIZE_LENGTH + cc->mbuf->used; 00607 #else 00608 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); 00609 #endif 00610 } 00611 00612 return len; 00613 } 00614 00615 static int 00616 compile_cclass_node(CClassNode* cc, regex_t* reg) 00617 { 00618 int r; 00619 00620 if (IS_NCCLASS_SHARE(cc)) { 00621 add_opcode(reg, OP_CCLASS_NODE); 00622 r = add_pointer(reg, cc); 00623 return r; 00624 } 00625 00626 if (IS_NULL(cc->mbuf)) { 00627 if (IS_NCCLASS_NOT(cc)) 00628 add_opcode(reg, OP_CCLASS_NOT); 00629 else 00630 add_opcode(reg, OP_CCLASS); 00631 00632 r = add_bitset(reg, cc->bs); 00633 } 00634 else { 00635 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00636 if (IS_NCCLASS_NOT(cc)) 00637 add_opcode(reg, OP_CCLASS_MB_NOT); 00638 else 00639 add_opcode(reg, OP_CCLASS_MB); 00640 00641 r = add_multi_byte_cclass(cc->mbuf, reg); 00642 } 00643 else { 00644 if (IS_NCCLASS_NOT(cc)) 00645 add_opcode(reg, OP_CCLASS_MIX_NOT); 00646 else 00647 add_opcode(reg, OP_CCLASS_MIX); 00648 00649 r = add_bitset(reg, cc->bs); 00650 if (r) return r; 00651 r = add_multi_byte_cclass(cc->mbuf, reg); 00652 } 00653 } 00654 00655 return r; 00656 } 00657 00658 static int 00659 entry_repeat_range(regex_t* reg, int id, int lower, int upper) 00660 { 00661 #define REPEAT_RANGE_ALLOC 4 00662 00663 OnigRepeatRange* p; 00664 00665 if (reg->repeat_range_alloc == 0) { 00666 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); 00667 CHECK_NULL_RETURN_MEMERR(p); 00668 reg->repeat_range = p; 00669 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; 00670 } 00671 else if (reg->repeat_range_alloc <= id) { 00672 int n; 00673 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; 00674 p = (OnigRepeatRange* )xrealloc(reg->repeat_range, 00675 sizeof(OnigRepeatRange) * n); 00676 CHECK_NULL_RETURN_MEMERR(p); 00677 reg->repeat_range = p; 00678 reg->repeat_range_alloc = n; 00679 } 00680 else { 00681 p = reg->repeat_range; 00682 } 00683 00684 p[id].lower = lower; 00685 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); 00686 return 0; 00687 } 00688 00689 static int 00690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, 00691 regex_t* reg) 00692 { 00693 int r; 00694 int num_repeat = reg->num_repeat; 00695 00696 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); 00697 if (r) return r; 00698 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00699 reg->num_repeat++; 00700 if (r) return r; 00701 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); 00702 if (r) return r; 00703 00704 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); 00705 if (r) return r; 00706 00707 r = compile_tree_empty_check(qn->target, reg, empty_info); 00708 if (r) return r; 00709 00710 if ( 00711 #ifdef USE_SUBEXP_CALL 00712 reg->num_call > 0 || 00713 #endif 00714 IS_QUANTIFIER_IN_REPEAT(qn)) { 00715 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); 00716 } 00717 else { 00718 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); 00719 } 00720 if (r) return r; 00721 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00722 return r; 00723 } 00724 00725 static int 00726 is_anychar_star_quantifier(QtfrNode* qn) 00727 { 00728 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && 00729 NTYPE(qn->target) == NT_CANY) 00730 return 1; 00731 else 00732 return 0; 00733 } 00734 00735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 00736 #define CKN_ON (ckn > 0) 00737 00738 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00739 00740 static int 00741 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00742 { 00743 int len, mod_tlen, cklen; 00744 int ckn; 00745 int infinite = IS_REPEAT_INFINITE(qn->upper); 00746 int empty_info = qn->target_empty_info; 00747 int tlen = compile_length_tree(qn->target, reg); 00748 00749 if (tlen < 0) return tlen; 00750 00751 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00752 00753 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); 00754 00755 /* anychar repeat */ 00756 if (NTYPE(qn->target) == NT_CANY) { 00757 if (qn->greedy && infinite) { 00758 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) 00759 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; 00760 else 00761 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; 00762 } 00763 } 00764 00765 if (empty_info != 0) 00766 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00767 else 00768 mod_tlen = tlen; 00769 00770 if (infinite && qn->lower <= 1) { 00771 if (qn->greedy) { 00772 if (qn->lower == 1) 00773 len = SIZE_OP_JUMP; 00774 else 00775 len = 0; 00776 00777 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; 00778 } 00779 else { 00780 if (qn->lower == 0) 00781 len = SIZE_OP_JUMP; 00782 else 00783 len = 0; 00784 00785 len += mod_tlen + SIZE_OP_PUSH + cklen; 00786 } 00787 } 00788 else if (qn->upper == 0) { 00789 if (qn->is_refered != 0) /* /(?<n>..){0}/ */ 00790 len = SIZE_OP_JUMP + tlen; 00791 else 00792 len = 0; 00793 } 00794 else if (qn->upper == 1 && qn->greedy) { 00795 if (qn->lower == 0) { 00796 if (CKN_ON) { 00797 len = SIZE_OP_STATE_CHECK_PUSH + tlen; 00798 } 00799 else { 00800 len = SIZE_OP_PUSH + tlen; 00801 } 00802 } 00803 else { 00804 len = tlen; 00805 } 00806 } 00807 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00808 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; 00809 } 00810 else { 00811 len = SIZE_OP_REPEAT_INC 00812 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 00813 if (CKN_ON) 00814 len += SIZE_OP_STATE_CHECK; 00815 } 00816 00817 return len; 00818 } 00819 00820 static int 00821 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 00822 { 00823 int r, mod_tlen; 00824 int ckn; 00825 int infinite = IS_REPEAT_INFINITE(qn->upper); 00826 int empty_info = qn->target_empty_info; 00827 int tlen = compile_length_tree(qn->target, reg); 00828 00829 if (tlen < 0) return tlen; 00830 00831 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00832 00833 if (is_anychar_star_quantifier(qn)) { 00834 r = compile_tree_n_times(qn->target, qn->lower, reg); 00835 if (r) return r; 00836 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { 00837 if (IS_MULTILINE(reg->options)) 00838 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 00839 else 00840 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 00841 if (r) return r; 00842 if (CKN_ON) { 00843 r = add_state_check_num(reg, ckn); 00844 if (r) return r; 00845 } 00846 00847 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 00848 } 00849 else { 00850 if (IS_MULTILINE(reg->options)) { 00851 r = add_opcode(reg, (CKN_ON ? 00852 OP_STATE_CHECK_ANYCHAR_ML_STAR 00853 : OP_ANYCHAR_ML_STAR)); 00854 } 00855 else { 00856 r = add_opcode(reg, (CKN_ON ? 00857 OP_STATE_CHECK_ANYCHAR_STAR 00858 : OP_ANYCHAR_STAR)); 00859 } 00860 if (r) return r; 00861 if (CKN_ON) 00862 r = add_state_check_num(reg, ckn); 00863 00864 return r; 00865 } 00866 } 00867 00868 if (empty_info != 0) 00869 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00870 else 00871 mod_tlen = tlen; 00872 00873 if (infinite && qn->lower <= 1) { 00874 if (qn->greedy) { 00875 if (qn->lower == 1) { 00876 r = add_opcode_rel_addr(reg, OP_JUMP, 00877 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); 00878 if (r) return r; 00879 } 00880 00881 if (CKN_ON) { 00882 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00883 if (r) return r; 00884 r = add_state_check_num(reg, ckn); 00885 if (r) return r; 00886 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); 00887 } 00888 else { 00889 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 00890 } 00891 if (r) return r; 00892 r = compile_tree_empty_check(qn->target, reg, empty_info); 00893 if (r) return r; 00894 r = add_opcode_rel_addr(reg, OP_JUMP, 00895 -(mod_tlen + (int )SIZE_OP_JUMP 00896 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); 00897 } 00898 else { 00899 if (qn->lower == 0) { 00900 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 00901 if (r) return r; 00902 } 00903 r = compile_tree_empty_check(qn->target, reg, empty_info); 00904 if (r) return r; 00905 if (CKN_ON) { 00906 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); 00907 if (r) return r; 00908 r = add_state_check_num(reg, ckn); 00909 if (r) return r; 00910 r = add_rel_addr(reg, 00911 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); 00912 } 00913 else 00914 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 00915 } 00916 } 00917 else if (qn->upper == 0) { 00918 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ 00919 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00920 if (r) return r; 00921 r = compile_tree(qn->target, reg); 00922 } 00923 else 00924 r = 0; 00925 } 00926 else if (qn->upper == 1 && qn->greedy) { 00927 if (qn->lower == 0) { 00928 if (CKN_ON) { 00929 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00930 if (r) return r; 00931 r = add_state_check_num(reg, ckn); 00932 if (r) return r; 00933 r = add_rel_addr(reg, tlen); 00934 } 00935 else { 00936 r = add_opcode_rel_addr(reg, OP_PUSH, tlen); 00937 } 00938 if (r) return r; 00939 } 00940 00941 r = compile_tree(qn->target, reg); 00942 } 00943 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00944 if (CKN_ON) { 00945 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00946 if (r) return r; 00947 r = add_state_check_num(reg, ckn); 00948 if (r) return r; 00949 r = add_rel_addr(reg, SIZE_OP_JUMP); 00950 } 00951 else { 00952 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 00953 } 00954 00955 if (r) return r; 00956 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00957 if (r) return r; 00958 r = compile_tree(qn->target, reg); 00959 } 00960 else { 00961 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 00962 if (CKN_ON) { 00963 if (r) return r; 00964 r = add_opcode(reg, OP_STATE_CHECK); 00965 if (r) return r; 00966 r = add_state_check_num(reg, ckn); 00967 } 00968 } 00969 return r; 00970 } 00971 00972 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00973 00974 static int 00975 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00976 { 00977 int len, mod_tlen; 00978 int infinite = IS_REPEAT_INFINITE(qn->upper); 00979 int empty_info = qn->target_empty_info; 00980 int tlen = compile_length_tree(qn->target, reg); 00981 00982 if (tlen < 0) return tlen; 00983 00984 /* anychar repeat */ 00985 if (NTYPE(qn->target) == NT_CANY) { 00986 if (qn->greedy && infinite) { 00987 if (IS_NOT_NULL(qn->next_head_exact)) 00988 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; 00989 else 00990 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; 00991 } 00992 } 00993 00994 if (empty_info != 0) 00995 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00996 else 00997 mod_tlen = tlen; 00998 00999 if (infinite && 01000 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01001 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01002 len = SIZE_OP_JUMP; 01003 } 01004 else { 01005 len = tlen * qn->lower; 01006 } 01007 01008 if (qn->greedy) { 01009 if (IS_NOT_NULL(qn->head_exact)) 01010 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; 01011 else if (IS_NOT_NULL(qn->next_head_exact)) 01012 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; 01013 else 01014 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; 01015 } 01016 else 01017 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; 01018 } 01019 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01020 len = SIZE_OP_JUMP + tlen; 01021 } 01022 else if (!infinite && qn->greedy && 01023 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01024 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01025 len = tlen * qn->lower; 01026 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); 01027 } 01028 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01029 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; 01030 } 01031 else { 01032 len = SIZE_OP_REPEAT_INC 01033 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 01034 } 01035 01036 return len; 01037 } 01038 01039 static int 01040 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 01041 { 01042 int i, r, mod_tlen; 01043 int infinite = IS_REPEAT_INFINITE(qn->upper); 01044 int empty_info = qn->target_empty_info; 01045 int tlen = compile_length_tree(qn->target, reg); 01046 01047 if (tlen < 0) return tlen; 01048 01049 if (is_anychar_star_quantifier(qn)) { 01050 r = compile_tree_n_times(qn->target, qn->lower, reg); 01051 if (r) return r; 01052 if (IS_NOT_NULL(qn->next_head_exact)) { 01053 if (IS_MULTILINE(reg->options)) 01054 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01055 else 01056 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 01057 if (r) return r; 01058 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01059 } 01060 else { 01061 if (IS_MULTILINE(reg->options)) 01062 return add_opcode(reg, OP_ANYCHAR_ML_STAR); 01063 else 01064 return add_opcode(reg, OP_ANYCHAR_STAR); 01065 } 01066 } 01067 01068 if (empty_info != 0) 01069 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 01070 else 01071 mod_tlen = tlen; 01072 01073 if (infinite && 01074 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01075 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01076 if (qn->greedy) { 01077 if (IS_NOT_NULL(qn->head_exact)) 01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); 01079 else if (IS_NOT_NULL(qn->next_head_exact)) 01080 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); 01081 else 01082 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); 01083 } 01084 else { 01085 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); 01086 } 01087 if (r) return r; 01088 } 01089 else { 01090 r = compile_tree_n_times(qn->target, qn->lower, reg); 01091 if (r) return r; 01092 } 01093 01094 if (qn->greedy) { 01095 if (IS_NOT_NULL(qn->head_exact)) { 01096 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, 01097 mod_tlen + SIZE_OP_JUMP); 01098 if (r) return r; 01099 add_bytes(reg, NSTR(qn->head_exact)->s, 1); 01100 r = compile_tree_empty_check(qn->target, reg, empty_info); 01101 if (r) return r; 01102 r = add_opcode_rel_addr(reg, OP_JUMP, 01103 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); 01104 } 01105 else if (IS_NOT_NULL(qn->next_head_exact)) { 01106 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, 01107 mod_tlen + SIZE_OP_JUMP); 01108 if (r) return r; 01109 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01110 r = compile_tree_empty_check(qn->target, reg, empty_info); 01111 if (r) return r; 01112 r = add_opcode_rel_addr(reg, OP_JUMP, 01113 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); 01114 } 01115 else { 01116 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 01117 if (r) return r; 01118 r = compile_tree_empty_check(qn->target, reg, empty_info); 01119 if (r) return r; 01120 r = add_opcode_rel_addr(reg, OP_JUMP, 01121 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); 01122 } 01123 } 01124 else { 01125 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 01126 if (r) return r; 01127 r = compile_tree_empty_check(qn->target, reg, empty_info); 01128 if (r) return r; 01129 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 01130 } 01131 } 01132 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01133 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01134 if (r) return r; 01135 r = compile_tree(qn->target, reg); 01136 } 01137 else if (!infinite && qn->greedy && 01138 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01139 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01140 int n = qn->upper - qn->lower; 01141 01142 r = compile_tree_n_times(qn->target, qn->lower, reg); 01143 if (r) return r; 01144 01145 for (i = 0; i < n; i++) { 01146 r = add_opcode_rel_addr(reg, OP_PUSH, 01147 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); 01148 if (r) return r; 01149 r = compile_tree(qn->target, reg); 01150 if (r) return r; 01151 } 01152 } 01153 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01154 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 01155 if (r) return r; 01156 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01157 if (r) return r; 01158 r = compile_tree(qn->target, reg); 01159 } 01160 else { 01161 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 01162 } 01163 return r; 01164 } 01165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01166 01167 static int 01168 compile_length_option_node(EncloseNode* node, regex_t* reg) 01169 { 01170 int tlen; 01171 OnigOptionType prev = reg->options; 01172 01173 reg->options = node->option; 01174 tlen = compile_length_tree(node->target, reg); 01175 reg->options = prev; 01176 01177 if (tlen < 0) return tlen; 01178 01179 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01180 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL 01181 + tlen + SIZE_OP_SET_OPTION; 01182 } 01183 else 01184 return tlen; 01185 } 01186 01187 static int 01188 compile_option_node(EncloseNode* node, regex_t* reg) 01189 { 01190 int r; 01191 OnigOptionType prev = reg->options; 01192 01193 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01194 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); 01195 if (r) return r; 01196 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01197 if (r) return r; 01198 r = add_opcode(reg, OP_FAIL); 01199 if (r) return r; 01200 } 01201 01202 reg->options = node->option; 01203 r = compile_tree(node->target, reg); 01204 reg->options = prev; 01205 01206 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01207 if (r) return r; 01208 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01209 } 01210 return r; 01211 } 01212 01213 static int 01214 compile_length_enclose_node(EncloseNode* node, regex_t* reg) 01215 { 01216 int len; 01217 int tlen; 01218 01219 if (node->type == ENCLOSE_OPTION) 01220 return compile_length_option_node(node, reg); 01221 01222 if (node->target) { 01223 tlen = compile_length_tree(node->target, reg); 01224 if (tlen < 0) return tlen; 01225 } 01226 else 01227 tlen = 0; 01228 01229 switch (node->type) { 01230 case ENCLOSE_MEMORY: 01231 #ifdef USE_SUBEXP_CALL 01232 if (IS_ENCLOSE_CALLED(node)) { 01233 len = SIZE_OP_MEMORY_START_PUSH + tlen 01234 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; 01235 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01236 len += (IS_ENCLOSE_RECURSION(node) 01237 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01238 else 01239 len += (IS_ENCLOSE_RECURSION(node) 01240 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01241 } 01242 else 01243 #endif 01244 { 01245 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01246 len = SIZE_OP_MEMORY_START_PUSH; 01247 else 01248 len = SIZE_OP_MEMORY_START; 01249 01250 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) 01251 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); 01252 } 01253 break; 01254 01255 case ENCLOSE_STOP_BACKTRACK: 01256 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01257 QtfrNode* qn = NQTFR(node->target); 01258 tlen = compile_length_tree(qn->target, reg); 01259 if (tlen < 0) return tlen; 01260 01261 len = tlen * qn->lower 01262 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; 01263 } 01264 else { 01265 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; 01266 } 01267 break; 01268 01269 case ENCLOSE_CONDITION: 01270 len = SIZE_OP_CONDITION; 01271 if (NTYPE(node->target) == NT_ALT) { 01272 Node* x = node->target; 01273 01274 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */ 01275 if (tlen < 0) return tlen; 01276 len += tlen + SIZE_OP_JUMP; 01277 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG; 01278 x = NCDR(x); 01279 tlen = compile_length_tree(NCAR(x), reg); /* no-node */ 01280 if (tlen < 0) return tlen; 01281 len += tlen; 01282 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN; 01283 } 01284 else { 01285 return ONIGERR_PARSER_BUG; 01286 } 01287 break; 01288 01289 default: 01290 return ONIGERR_TYPE_BUG; 01291 break; 01292 } 01293 01294 return len; 01295 } 01296 01297 static int get_char_length_tree(Node* node, regex_t* reg, int* len); 01298 01299 static int 01300 compile_enclose_node(EncloseNode* node, regex_t* reg) 01301 { 01302 int r, len; 01303 01304 if (node->type == ENCLOSE_OPTION) 01305 return compile_option_node(node, reg); 01306 01307 switch (node->type) { 01308 case ENCLOSE_MEMORY: 01309 #ifdef USE_SUBEXP_CALL 01310 if (IS_ENCLOSE_CALLED(node)) { 01311 r = add_opcode(reg, OP_CALL); 01312 if (r) return r; 01313 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; 01314 node->state |= NST_ADDR_FIXED; 01315 r = add_abs_addr(reg, (int )node->call_addr); 01316 if (r) return r; 01317 len = compile_length_tree(node->target, reg); 01318 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); 01319 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01320 len += (IS_ENCLOSE_RECURSION(node) 01321 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01322 else 01323 len += (IS_ENCLOSE_RECURSION(node) 01324 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01325 01326 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01327 if (r) return r; 01328 } 01329 #endif 01330 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01331 r = add_opcode(reg, OP_MEMORY_START_PUSH); 01332 else 01333 r = add_opcode(reg, OP_MEMORY_START); 01334 if (r) return r; 01335 r = add_mem_num(reg, node->regnum); 01336 if (r) return r; 01337 r = compile_tree(node->target, reg); 01338 if (r) return r; 01339 #ifdef USE_SUBEXP_CALL 01340 if (IS_ENCLOSE_CALLED(node)) { 01341 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01342 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01343 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); 01344 else 01345 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01346 ? OP_MEMORY_END_REC : OP_MEMORY_END)); 01347 01348 if (r) return r; 01349 r = add_mem_num(reg, node->regnum); 01350 if (r) return r; 01351 r = add_opcode(reg, OP_RETURN); 01352 } 01353 else 01354 #endif 01355 { 01356 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01357 r = add_opcode(reg, OP_MEMORY_END_PUSH); 01358 else 01359 r = add_opcode(reg, OP_MEMORY_END); 01360 if (r) return r; 01361 r = add_mem_num(reg, node->regnum); 01362 } 01363 break; 01364 01365 case ENCLOSE_STOP_BACKTRACK: 01366 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01367 QtfrNode* qn = NQTFR(node->target); 01368 r = compile_tree_n_times(qn->target, qn->lower, reg); 01369 if (r) return r; 01370 01371 len = compile_length_tree(qn->target, reg); 01372 if (len < 0) return len; 01373 01374 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); 01375 if (r) return r; 01376 r = compile_tree(qn->target, reg); 01377 if (r) return r; 01378 r = add_opcode(reg, OP_POP); 01379 if (r) return r; 01380 r = add_opcode_rel_addr(reg, OP_JUMP, 01381 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); 01382 } 01383 else { 01384 r = add_opcode(reg, OP_PUSH_STOP_BT); 01385 if (r) return r; 01386 r = compile_tree(node->target, reg); 01387 if (r) return r; 01388 r = add_opcode(reg, OP_POP_STOP_BT); 01389 } 01390 break; 01391 01392 case ENCLOSE_CONDITION: 01393 r = add_opcode(reg, OP_CONDITION); 01394 if (r) return r; 01395 r = add_mem_num(reg, node->regnum); 01396 if (r) return r; 01397 01398 if (NTYPE(node->target) == NT_ALT) { 01399 Node* x = node->target; 01400 int len2; 01401 01402 len = compile_length_tree(NCAR(x), reg); /* yes-node */ 01403 if (len < 0) return len; 01404 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG; 01405 x = NCDR(x); 01406 len2 = compile_length_tree(NCAR(x), reg); /* no-node */ 01407 if (len2 < 0) return len2; 01408 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN; 01409 01410 x = node->target; 01411 r = add_rel_addr(reg, len + SIZE_OP_JUMP); 01412 if (r) return r; 01413 r = compile_tree(NCAR(x), reg); /* yes-node */ 01414 if (r) return r; 01415 r = add_opcode_rel_addr(reg, OP_JUMP, len2); 01416 if (r) return r; 01417 x = NCDR(x); 01418 r = compile_tree(NCAR(x), reg); /* no-node */ 01419 } 01420 else { 01421 return ONIGERR_PARSER_BUG; 01422 } 01423 break; 01424 01425 default: 01426 return ONIGERR_TYPE_BUG; 01427 break; 01428 } 01429 01430 return r; 01431 } 01432 01433 static int 01434 compile_length_anchor_node(AnchorNode* node, regex_t* reg) 01435 { 01436 int len; 01437 int tlen = 0; 01438 01439 if (node->target) { 01440 tlen = compile_length_tree(node->target, reg); 01441 if (tlen < 0) return tlen; 01442 } 01443 01444 switch (node->type) { 01445 case ANCHOR_PREC_READ: 01446 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; 01447 break; 01448 case ANCHOR_PREC_READ_NOT: 01449 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; 01450 break; 01451 case ANCHOR_LOOK_BEHIND: 01452 len = SIZE_OP_LOOK_BEHIND + tlen; 01453 break; 01454 case ANCHOR_LOOK_BEHIND_NOT: 01455 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; 01456 break; 01457 01458 default: 01459 len = SIZE_OPCODE; 01460 break; 01461 } 01462 01463 return len; 01464 } 01465 01466 static int 01467 compile_anchor_node(AnchorNode* node, regex_t* reg) 01468 { 01469 int r, len; 01470 01471 switch (node->type) { 01472 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; 01473 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; 01474 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; 01475 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; 01476 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; 01477 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; 01478 01479 /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */ 01480 case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break; 01481 01482 case ANCHOR_WORD_BOUND: 01483 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND); 01484 else r = add_opcode(reg, OP_WORD_BOUND); 01485 break; 01486 case ANCHOR_NOT_WORD_BOUND: 01487 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND); 01488 else r = add_opcode(reg, OP_NOT_WORD_BOUND); 01489 break; 01490 #ifdef USE_WORD_BEGIN_END 01491 case ANCHOR_WORD_BEGIN: 01492 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN); 01493 else r = add_opcode(reg, OP_WORD_BEGIN); 01494 break; 01495 case ANCHOR_WORD_END: 01496 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END); 01497 else r = add_opcode(reg, OP_WORD_END); 01498 break; 01499 #endif 01500 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break; 01501 01502 case ANCHOR_PREC_READ: 01503 r = add_opcode(reg, OP_PUSH_POS); 01504 if (r) return r; 01505 r = compile_tree(node->target, reg); 01506 if (r) return r; 01507 r = add_opcode(reg, OP_POP_POS); 01508 break; 01509 01510 case ANCHOR_PREC_READ_NOT: 01511 len = compile_length_tree(node->target, reg); 01512 if (len < 0) return len; 01513 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); 01514 if (r) return r; 01515 r = compile_tree(node->target, reg); 01516 if (r) return r; 01517 r = add_opcode(reg, OP_FAIL_POS); 01518 break; 01519 01520 case ANCHOR_LOOK_BEHIND: 01521 { 01522 int n; 01523 r = add_opcode(reg, OP_LOOK_BEHIND); 01524 if (r) return r; 01525 if (node->char_len < 0) { 01526 r = get_char_length_tree(node->target, reg, &n); 01527 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01528 } 01529 else 01530 n = node->char_len; 01531 r = add_length(reg, n); 01532 if (r) return r; 01533 r = compile_tree(node->target, reg); 01534 } 01535 break; 01536 01537 case ANCHOR_LOOK_BEHIND_NOT: 01538 { 01539 int n; 01540 len = compile_length_tree(node->target, reg); 01541 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, 01542 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); 01543 if (r) return r; 01544 if (node->char_len < 0) { 01545 r = get_char_length_tree(node->target, reg, &n); 01546 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01547 } 01548 else 01549 n = node->char_len; 01550 r = add_length(reg, n); 01551 if (r) return r; 01552 r = compile_tree(node->target, reg); 01553 if (r) return r; 01554 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); 01555 } 01556 break; 01557 01558 default: 01559 return ONIGERR_TYPE_BUG; 01560 break; 01561 } 01562 01563 return r; 01564 } 01565 01566 static int 01567 compile_length_tree(Node* node, regex_t* reg) 01568 { 01569 int len, type, r; 01570 01571 type = NTYPE(node); 01572 switch (type) { 01573 case NT_LIST: 01574 len = 0; 01575 do { 01576 r = compile_length_tree(NCAR(node), reg); 01577 if (r < 0) return r; 01578 len += r; 01579 } while (IS_NOT_NULL(node = NCDR(node))); 01580 r = len; 01581 break; 01582 01583 case NT_ALT: 01584 { 01585 int n; 01586 01587 n = r = 0; 01588 do { 01589 r += compile_length_tree(NCAR(node), reg); 01590 n++; 01591 } while (IS_NOT_NULL(node = NCDR(node))); 01592 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); 01593 } 01594 break; 01595 01596 case NT_STR: 01597 if (NSTRING_IS_RAW(node)) 01598 r = compile_length_string_raw_node(NSTR(node), reg); 01599 else 01600 r = compile_length_string_node(node, reg); 01601 break; 01602 01603 case NT_CCLASS: 01604 r = compile_length_cclass_node(NCCLASS(node), reg); 01605 break; 01606 01607 case NT_CTYPE: 01608 case NT_CANY: 01609 r = SIZE_OPCODE; 01610 break; 01611 01612 case NT_BREF: 01613 { 01614 BRefNode* br = NBREF(node); 01615 01616 #ifdef USE_BACKREF_WITH_LEVEL 01617 if (IS_BACKREF_NEST_LEVEL(br)) { 01618 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + 01619 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01620 } 01621 else 01622 #endif 01623 if (br->back_num == 1) { 01624 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) 01625 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); 01626 } 01627 else { 01628 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01629 } 01630 } 01631 break; 01632 01633 #ifdef USE_SUBEXP_CALL 01634 case NT_CALL: 01635 r = SIZE_OP_CALL; 01636 break; 01637 #endif 01638 01639 case NT_QTFR: 01640 r = compile_length_quantifier_node(NQTFR(node), reg); 01641 break; 01642 01643 case NT_ENCLOSE: 01644 r = compile_length_enclose_node(NENCLOSE(node), reg); 01645 break; 01646 01647 case NT_ANCHOR: 01648 r = compile_length_anchor_node(NANCHOR(node), reg); 01649 break; 01650 01651 default: 01652 return ONIGERR_TYPE_BUG; 01653 break; 01654 } 01655 01656 return r; 01657 } 01658 01659 static int 01660 compile_tree(Node* node, regex_t* reg) 01661 { 01662 int n, type, len, pos, r = 0; 01663 01664 type = NTYPE(node); 01665 switch (type) { 01666 case NT_LIST: 01667 do { 01668 r = compile_tree(NCAR(node), reg); 01669 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01670 break; 01671 01672 case NT_ALT: 01673 { 01674 Node* x = node; 01675 len = 0; 01676 do { 01677 len += compile_length_tree(NCAR(x), reg); 01678 if (NCDR(x) != NULL) { 01679 len += SIZE_OP_PUSH + SIZE_OP_JUMP; 01680 } 01681 } while (IS_NOT_NULL(x = NCDR(x))); 01682 pos = reg->used + len; /* goal position */ 01683 01684 do { 01685 len = compile_length_tree(NCAR(node), reg); 01686 if (IS_NOT_NULL(NCDR(node))) { 01687 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); 01688 if (r) break; 01689 } 01690 r = compile_tree(NCAR(node), reg); 01691 if (r) break; 01692 if (IS_NOT_NULL(NCDR(node))) { 01693 len = pos - (reg->used + SIZE_OP_JUMP); 01694 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01695 if (r) break; 01696 } 01697 } while (IS_NOT_NULL(node = NCDR(node))); 01698 } 01699 break; 01700 01701 case NT_STR: 01702 if (NSTRING_IS_RAW(node)) 01703 r = compile_string_raw_node(NSTR(node), reg); 01704 else 01705 r = compile_string_node(node, reg); 01706 break; 01707 01708 case NT_CCLASS: 01709 r = compile_cclass_node(NCCLASS(node), reg); 01710 break; 01711 01712 case NT_CTYPE: 01713 { 01714 int op; 01715 01716 switch (NCTYPE(node)->ctype) { 01717 case ONIGENC_CTYPE_WORD: 01718 if (NCTYPE(node)->ascii_range != 0) { 01719 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD; 01720 else op = OP_ASCII_WORD; 01721 } 01722 else { 01723 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; 01724 else op = OP_WORD; 01725 } 01726 break; 01727 default: 01728 return ONIGERR_TYPE_BUG; 01729 break; 01730 } 01731 r = add_opcode(reg, op); 01732 } 01733 break; 01734 01735 case NT_CANY: 01736 if (IS_MULTILINE(reg->options)) 01737 r = add_opcode(reg, OP_ANYCHAR_ML); 01738 else 01739 r = add_opcode(reg, OP_ANYCHAR); 01740 break; 01741 01742 case NT_BREF: 01743 { 01744 BRefNode* br = NBREF(node); 01745 01746 #ifdef USE_BACKREF_WITH_LEVEL 01747 if (IS_BACKREF_NEST_LEVEL(br)) { 01748 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); 01749 if (r) return r; 01750 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); 01751 if (r) return r; 01752 r = add_length(reg, br->nest_level); 01753 if (r) return r; 01754 01755 goto add_bacref_mems; 01756 } 01757 else 01758 #endif 01759 if (br->back_num == 1) { 01760 n = br->back_static[0]; 01761 if (IS_IGNORECASE(reg->options)) { 01762 r = add_opcode(reg, OP_BACKREFN_IC); 01763 if (r) return r; 01764 r = add_mem_num(reg, n); 01765 } 01766 else { 01767 switch (n) { 01768 case 1: r = add_opcode(reg, OP_BACKREF1); break; 01769 case 2: r = add_opcode(reg, OP_BACKREF2); break; 01770 default: 01771 r = add_opcode(reg, OP_BACKREFN); 01772 if (r) return r; 01773 r = add_mem_num(reg, n); 01774 break; 01775 } 01776 } 01777 } 01778 else { 01779 int i; 01780 int* p; 01781 01782 if (IS_IGNORECASE(reg->options)) { 01783 r = add_opcode(reg, OP_BACKREF_MULTI_IC); 01784 } 01785 else { 01786 r = add_opcode(reg, OP_BACKREF_MULTI); 01787 } 01788 if (r) return r; 01789 01790 #ifdef USE_BACKREF_WITH_LEVEL 01791 add_bacref_mems: 01792 #endif 01793 r = add_length(reg, br->back_num); 01794 if (r) return r; 01795 p = BACKREFS_P(br); 01796 for (i = br->back_num - 1; i >= 0; i--) { 01797 r = add_mem_num(reg, p[i]); 01798 if (r) return r; 01799 } 01800 } 01801 } 01802 break; 01803 01804 #ifdef USE_SUBEXP_CALL 01805 case NT_CALL: 01806 r = compile_call(NCALL(node), reg); 01807 break; 01808 #endif 01809 01810 case NT_QTFR: 01811 r = compile_quantifier_node(NQTFR(node), reg); 01812 break; 01813 01814 case NT_ENCLOSE: 01815 r = compile_enclose_node(NENCLOSE(node), reg); 01816 break; 01817 01818 case NT_ANCHOR: 01819 r = compile_anchor_node(NANCHOR(node), reg); 01820 break; 01821 01822 default: 01823 #ifdef ONIG_DEBUG 01824 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); 01825 #endif 01826 break; 01827 } 01828 01829 return r; 01830 } 01831 01832 #ifdef USE_NAMED_GROUP 01833 01834 static int 01835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) 01836 { 01837 int r = 0; 01838 Node* node = *plink; 01839 01840 switch (NTYPE(node)) { 01841 case NT_LIST: 01842 case NT_ALT: 01843 do { 01844 r = noname_disable_map(&(NCAR(node)), map, counter); 01845 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01846 break; 01847 01848 case NT_QTFR: 01849 { 01850 Node** ptarget = &(NQTFR(node)->target); 01851 Node* old = *ptarget; 01852 r = noname_disable_map(ptarget, map, counter); 01853 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { 01854 onig_reduce_nested_quantifier(node, *ptarget); 01855 } 01856 } 01857 break; 01858 01859 case NT_ENCLOSE: 01860 { 01861 EncloseNode* en = NENCLOSE(node); 01862 if (en->type == ENCLOSE_MEMORY) { 01863 if (IS_ENCLOSE_NAMED_GROUP(en)) { 01864 (*counter)++; 01865 map[en->regnum].new_val = *counter; 01866 en->regnum = *counter; 01867 r = noname_disable_map(&(en->target), map, counter); 01868 } 01869 else { 01870 *plink = en->target; 01871 en->target = NULL_NODE; 01872 onig_node_free(node); 01873 r = noname_disable_map(plink, map, counter); 01874 } 01875 } 01876 else 01877 r = noname_disable_map(&(en->target), map, counter); 01878 } 01879 break; 01880 01881 case NT_ANCHOR: 01882 { 01883 AnchorNode* an = NANCHOR(node); 01884 switch (an->type) { 01885 case ANCHOR_PREC_READ: 01886 case ANCHOR_PREC_READ_NOT: 01887 case ANCHOR_LOOK_BEHIND: 01888 case ANCHOR_LOOK_BEHIND_NOT: 01889 r = noname_disable_map(&(an->target), map, counter); 01890 break; 01891 } 01892 } 01893 break; 01894 01895 default: 01896 break; 01897 } 01898 01899 return r; 01900 } 01901 01902 static int 01903 renumber_node_backref(Node* node, GroupNumRemap* map) 01904 { 01905 int i, pos, n, old_num; 01906 int *backs; 01907 BRefNode* bn = NBREF(node); 01908 01909 if (! IS_BACKREF_NAME_REF(bn)) 01910 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01911 01912 old_num = bn->back_num; 01913 if (IS_NULL(bn->back_dynamic)) 01914 backs = bn->back_static; 01915 else 01916 backs = bn->back_dynamic; 01917 01918 for (i = 0, pos = 0; i < old_num; i++) { 01919 n = map[backs[i]].new_val; 01920 if (n > 0) { 01921 backs[pos] = n; 01922 pos++; 01923 } 01924 } 01925 01926 bn->back_num = pos; 01927 return 0; 01928 } 01929 01930 static int 01931 renumber_by_map(Node* node, GroupNumRemap* map) 01932 { 01933 int r = 0; 01934 01935 switch (NTYPE(node)) { 01936 case NT_LIST: 01937 case NT_ALT: 01938 do { 01939 r = renumber_by_map(NCAR(node), map); 01940 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01941 break; 01942 case NT_QTFR: 01943 r = renumber_by_map(NQTFR(node)->target, map); 01944 break; 01945 case NT_ENCLOSE: 01946 { 01947 EncloseNode* en = NENCLOSE(node); 01948 if (en->type == ENCLOSE_CONDITION) 01949 en->regnum = map[en->regnum].new_val; 01950 r = renumber_by_map(en->target, map); 01951 } 01952 break; 01953 01954 case NT_BREF: 01955 r = renumber_node_backref(node, map); 01956 break; 01957 01958 case NT_ANCHOR: 01959 { 01960 AnchorNode* an = NANCHOR(node); 01961 switch (an->type) { 01962 case ANCHOR_PREC_READ: 01963 case ANCHOR_PREC_READ_NOT: 01964 case ANCHOR_LOOK_BEHIND: 01965 case ANCHOR_LOOK_BEHIND_NOT: 01966 r = renumber_by_map(an->target, map); 01967 break; 01968 } 01969 } 01970 break; 01971 01972 default: 01973 break; 01974 } 01975 01976 return r; 01977 } 01978 01979 static int 01980 numbered_ref_check(Node* node) 01981 { 01982 int r = 0; 01983 01984 switch (NTYPE(node)) { 01985 case NT_LIST: 01986 case NT_ALT: 01987 do { 01988 r = numbered_ref_check(NCAR(node)); 01989 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01990 break; 01991 case NT_QTFR: 01992 r = numbered_ref_check(NQTFR(node)->target); 01993 break; 01994 case NT_ENCLOSE: 01995 r = numbered_ref_check(NENCLOSE(node)->target); 01996 break; 01997 01998 case NT_BREF: 01999 if (! IS_BACKREF_NAME_REF(NBREF(node))) 02000 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 02001 break; 02002 02003 default: 02004 break; 02005 } 02006 02007 return r; 02008 } 02009 02010 static int 02011 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) 02012 { 02013 int r, i, pos, counter; 02014 BitStatusType loc; 02015 GroupNumRemap* map; 02016 02017 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); 02018 CHECK_NULL_RETURN_MEMERR(map); 02019 for (i = 1; i <= env->num_mem; i++) { 02020 map[i].new_val = 0; 02021 } 02022 counter = 0; 02023 r = noname_disable_map(root, map, &counter); 02024 if (r != 0) return r; 02025 02026 r = renumber_by_map(*root, map); 02027 if (r != 0) return r; 02028 02029 for (i = 1, pos = 1; i <= env->num_mem; i++) { 02030 if (map[i].new_val > 0) { 02031 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; 02032 pos++; 02033 } 02034 } 02035 02036 loc = env->capture_history; 02037 BIT_STATUS_CLEAR(env->capture_history); 02038 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 02039 if (BIT_STATUS_AT(loc, i)) { 02040 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); 02041 } 02042 } 02043 02044 env->num_mem = env->num_named; 02045 reg->num_mem = env->num_named; 02046 02047 return onig_renumber_name_table(reg, map); 02048 } 02049 #endif /* USE_NAMED_GROUP */ 02050 02051 #ifdef USE_SUBEXP_CALL 02052 static int 02053 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) 02054 { 02055 int i, offset; 02056 EncloseNode* en; 02057 AbsAddrType addr; 02058 02059 for (i = 0; i < uslist->num; i++) { 02060 en = NENCLOSE(uslist->us[i].target); 02061 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; 02062 addr = en->call_addr; 02063 offset = uslist->us[i].offset; 02064 02065 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); 02066 } 02067 return 0; 02068 } 02069 #endif 02070 02071 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02072 static int 02073 quantifiers_memory_node_info(Node* node) 02074 { 02075 int r = 0; 02076 02077 switch (NTYPE(node)) { 02078 case NT_LIST: 02079 case NT_ALT: 02080 { 02081 int v; 02082 do { 02083 v = quantifiers_memory_node_info(NCAR(node)); 02084 if (v > r) r = v; 02085 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); 02086 } 02087 break; 02088 02089 #ifdef USE_SUBEXP_CALL 02090 case NT_CALL: 02091 if (IS_CALL_RECURSION(NCALL(node))) { 02092 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ 02093 } 02094 else 02095 r = quantifiers_memory_node_info(NCALL(node)->target); 02096 break; 02097 #endif 02098 02099 case NT_QTFR: 02100 { 02101 QtfrNode* qn = NQTFR(node); 02102 if (qn->upper != 0) { 02103 r = quantifiers_memory_node_info(qn->target); 02104 } 02105 } 02106 break; 02107 02108 case NT_ENCLOSE: 02109 { 02110 EncloseNode* en = NENCLOSE(node); 02111 switch (en->type) { 02112 case ENCLOSE_MEMORY: 02113 return NQ_TARGET_IS_EMPTY_MEM; 02114 break; 02115 02116 case ENCLOSE_OPTION: 02117 case ENCLOSE_STOP_BACKTRACK: 02118 case ENCLOSE_CONDITION: 02119 r = quantifiers_memory_node_info(en->target); 02120 break; 02121 default: 02122 break; 02123 } 02124 } 02125 break; 02126 02127 case NT_BREF: 02128 case NT_STR: 02129 case NT_CTYPE: 02130 case NT_CCLASS: 02131 case NT_CANY: 02132 case NT_ANCHOR: 02133 default: 02134 break; 02135 } 02136 02137 return r; 02138 } 02139 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ 02140 02141 static int 02142 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) 02143 { 02144 OnigDistance tmin; 02145 int r = 0; 02146 02147 *min = 0; 02148 switch (NTYPE(node)) { 02149 case NT_BREF: 02150 { 02151 int i; 02152 int* backs; 02153 Node** nodes = SCANENV_MEM_NODES(env); 02154 BRefNode* br = NBREF(node); 02155 if (br->state & NST_RECURSION) break; 02156 02157 backs = BACKREFS_P(br); 02158 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02159 r = get_min_match_length(nodes[backs[0]], min, env); 02160 if (r != 0) break; 02161 for (i = 1; i < br->back_num; i++) { 02162 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02163 r = get_min_match_length(nodes[backs[i]], &tmin, env); 02164 if (r != 0) break; 02165 if (*min > tmin) *min = tmin; 02166 } 02167 } 02168 break; 02169 02170 #ifdef USE_SUBEXP_CALL 02171 case NT_CALL: 02172 if (IS_CALL_RECURSION(NCALL(node))) { 02173 EncloseNode* en = NENCLOSE(NCALL(node)->target); 02174 if (IS_ENCLOSE_MIN_FIXED(en)) 02175 *min = en->min_len; 02176 } 02177 else 02178 r = get_min_match_length(NCALL(node)->target, min, env); 02179 break; 02180 #endif 02181 02182 case NT_LIST: 02183 do { 02184 r = get_min_match_length(NCAR(node), &tmin, env); 02185 if (r == 0) *min += tmin; 02186 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02187 break; 02188 02189 case NT_ALT: 02190 { 02191 Node *x, *y; 02192 y = node; 02193 do { 02194 x = NCAR(y); 02195 r = get_min_match_length(x, &tmin, env); 02196 if (r != 0) break; 02197 if (y == node) *min = tmin; 02198 else if (*min > tmin) *min = tmin; 02199 } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); 02200 } 02201 break; 02202 02203 case NT_STR: 02204 { 02205 StrNode* sn = NSTR(node); 02206 *min = sn->end - sn->s; 02207 } 02208 break; 02209 02210 case NT_CTYPE: 02211 *min = 1; 02212 break; 02213 02214 case NT_CCLASS: 02215 case NT_CANY: 02216 *min = 1; 02217 break; 02218 02219 case NT_QTFR: 02220 { 02221 QtfrNode* qn = NQTFR(node); 02222 02223 if (qn->lower > 0) { 02224 r = get_min_match_length(qn->target, min, env); 02225 if (r == 0) 02226 *min = distance_multiply(*min, qn->lower); 02227 } 02228 } 02229 break; 02230 02231 case NT_ENCLOSE: 02232 { 02233 EncloseNode* en = NENCLOSE(node); 02234 switch (en->type) { 02235 case ENCLOSE_MEMORY: 02236 #ifdef USE_SUBEXP_CALL 02237 if (IS_ENCLOSE_MIN_FIXED(en)) 02238 *min = en->min_len; 02239 else { 02240 r = get_min_match_length(en->target, min, env); 02241 if (r == 0) { 02242 en->min_len = *min; 02243 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); 02244 } 02245 } 02246 break; 02247 #endif 02248 case ENCLOSE_OPTION: 02249 case ENCLOSE_STOP_BACKTRACK: 02250 case ENCLOSE_CONDITION: 02251 r = get_min_match_length(en->target, min, env); 02252 break; 02253 } 02254 } 02255 break; 02256 02257 case NT_ANCHOR: 02258 default: 02259 break; 02260 } 02261 02262 return r; 02263 } 02264 02265 static int 02266 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) 02267 { 02268 OnigDistance tmax; 02269 int r = 0; 02270 02271 *max = 0; 02272 switch (NTYPE(node)) { 02273 case NT_LIST: 02274 do { 02275 r = get_max_match_length(NCAR(node), &tmax, env); 02276 if (r == 0) 02277 *max = distance_add(*max, tmax); 02278 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02279 break; 02280 02281 case NT_ALT: 02282 do { 02283 r = get_max_match_length(NCAR(node), &tmax, env); 02284 if (r == 0 && *max < tmax) *max = tmax; 02285 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02286 break; 02287 02288 case NT_STR: 02289 { 02290 StrNode* sn = NSTR(node); 02291 *max = sn->end - sn->s; 02292 } 02293 break; 02294 02295 case NT_CTYPE: 02296 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02297 break; 02298 02299 case NT_CCLASS: 02300 case NT_CANY: 02301 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02302 break; 02303 02304 case NT_BREF: 02305 { 02306 int i; 02307 int* backs; 02308 Node** nodes = SCANENV_MEM_NODES(env); 02309 BRefNode* br = NBREF(node); 02310 if (br->state & NST_RECURSION) { 02311 *max = ONIG_INFINITE_DISTANCE; 02312 break; 02313 } 02314 backs = BACKREFS_P(br); 02315 for (i = 0; i < br->back_num; i++) { 02316 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02317 r = get_max_match_length(nodes[backs[i]], &tmax, env); 02318 if (r != 0) break; 02319 if (*max < tmax) *max = tmax; 02320 } 02321 } 02322 break; 02323 02324 #ifdef USE_SUBEXP_CALL 02325 case NT_CALL: 02326 if (! IS_CALL_RECURSION(NCALL(node))) 02327 r = get_max_match_length(NCALL(node)->target, max, env); 02328 else 02329 *max = ONIG_INFINITE_DISTANCE; 02330 break; 02331 #endif 02332 02333 case NT_QTFR: 02334 { 02335 QtfrNode* qn = NQTFR(node); 02336 02337 if (qn->upper != 0) { 02338 r = get_max_match_length(qn->target, max, env); 02339 if (r == 0 && *max != 0) { 02340 if (! IS_REPEAT_INFINITE(qn->upper)) 02341 *max = distance_multiply(*max, qn->upper); 02342 else 02343 *max = ONIG_INFINITE_DISTANCE; 02344 } 02345 } 02346 } 02347 break; 02348 02349 case NT_ENCLOSE: 02350 { 02351 EncloseNode* en = NENCLOSE(node); 02352 switch (en->type) { 02353 case ENCLOSE_MEMORY: 02354 #ifdef USE_SUBEXP_CALL 02355 if (IS_ENCLOSE_MAX_FIXED(en)) 02356 *max = en->max_len; 02357 else { 02358 r = get_max_match_length(en->target, max, env); 02359 if (r == 0) { 02360 en->max_len = *max; 02361 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); 02362 } 02363 } 02364 break; 02365 #endif 02366 case ENCLOSE_OPTION: 02367 case ENCLOSE_STOP_BACKTRACK: 02368 case ENCLOSE_CONDITION: 02369 r = get_max_match_length(en->target, max, env); 02370 break; 02371 } 02372 } 02373 break; 02374 02375 case NT_ANCHOR: 02376 default: 02377 break; 02378 } 02379 02380 return r; 02381 } 02382 02383 #define GET_CHAR_LEN_VARLEN -1 02384 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 02385 02386 /* fixed size pattern node only */ 02387 static int 02388 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) 02389 { 02390 int tlen; 02391 int r = 0; 02392 02393 level++; 02394 *len = 0; 02395 switch (NTYPE(node)) { 02396 case NT_LIST: 02397 do { 02398 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02399 if (r == 0) 02400 *len = (int )distance_add(*len, tlen); 02401 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02402 break; 02403 02404 case NT_ALT: 02405 { 02406 int tlen2; 02407 int varlen = 0; 02408 02409 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02410 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { 02411 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); 02412 if (r == 0) { 02413 if (tlen != tlen2) 02414 varlen = 1; 02415 } 02416 } 02417 if (r == 0) { 02418 if (varlen != 0) { 02419 if (level == 1) 02420 r = GET_CHAR_LEN_TOP_ALT_VARLEN; 02421 else 02422 r = GET_CHAR_LEN_VARLEN; 02423 } 02424 else 02425 *len = tlen; 02426 } 02427 } 02428 break; 02429 02430 case NT_STR: 02431 { 02432 StrNode* sn = NSTR(node); 02433 UChar *s = sn->s; 02434 while (s < sn->end) { 02435 s += enclen(reg->enc, s, sn->end); 02436 (*len)++; 02437 } 02438 } 02439 break; 02440 02441 case NT_QTFR: 02442 { 02443 QtfrNode* qn = NQTFR(node); 02444 if (qn->lower == qn->upper) { 02445 r = get_char_length_tree1(qn->target, reg, &tlen, level); 02446 if (r == 0) 02447 *len = (int )distance_multiply(tlen, qn->lower); 02448 } 02449 else 02450 r = GET_CHAR_LEN_VARLEN; 02451 } 02452 break; 02453 02454 #ifdef USE_SUBEXP_CALL 02455 case NT_CALL: 02456 if (! IS_CALL_RECURSION(NCALL(node))) 02457 r = get_char_length_tree1(NCALL(node)->target, reg, len, level); 02458 else 02459 r = GET_CHAR_LEN_VARLEN; 02460 break; 02461 #endif 02462 02463 case NT_CTYPE: 02464 *len = 1; 02465 break; 02466 02467 case NT_CCLASS: 02468 case NT_CANY: 02469 *len = 1; 02470 break; 02471 02472 case NT_ENCLOSE: 02473 { 02474 EncloseNode* en = NENCLOSE(node); 02475 switch (en->type) { 02476 case ENCLOSE_MEMORY: 02477 #ifdef USE_SUBEXP_CALL 02478 if (IS_ENCLOSE_CLEN_FIXED(en)) 02479 *len = en->char_len; 02480 else { 02481 r = get_char_length_tree1(en->target, reg, len, level); 02482 if (r == 0) { 02483 en->char_len = *len; 02484 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); 02485 } 02486 } 02487 break; 02488 #endif 02489 case ENCLOSE_OPTION: 02490 case ENCLOSE_STOP_BACKTRACK: 02491 case ENCLOSE_CONDITION: 02492 r = get_char_length_tree1(en->target, reg, len, level); 02493 break; 02494 default: 02495 break; 02496 } 02497 } 02498 break; 02499 02500 case NT_ANCHOR: 02501 break; 02502 02503 default: 02504 r = GET_CHAR_LEN_VARLEN; 02505 break; 02506 } 02507 02508 return r; 02509 } 02510 02511 static int 02512 get_char_length_tree(Node* node, regex_t* reg, int* len) 02513 { 02514 return get_char_length_tree1(node, reg, len, 0); 02515 } 02516 02517 /* x is not included y ==> 1 : 0 */ 02518 static int 02519 is_not_included(Node* x, Node* y, regex_t* reg) 02520 { 02521 int i; 02522 OnigDistance len; 02523 OnigCodePoint code; 02524 UChar *p; 02525 int ytype; 02526 02527 retry: 02528 ytype = NTYPE(y); 02529 switch (NTYPE(x)) { 02530 case NT_CTYPE: 02531 { 02532 switch (ytype) { 02533 case NT_CTYPE: 02534 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && 02535 NCTYPE(y)->not != NCTYPE(x)->not && 02536 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range) 02537 return 1; 02538 else 02539 return 0; 02540 break; 02541 02542 case NT_CCLASS: 02543 swap: 02544 { 02545 Node* tmp; 02546 tmp = x; x = y; y = tmp; 02547 goto retry; 02548 } 02549 break; 02550 02551 case NT_STR: 02552 goto swap; 02553 break; 02554 02555 default: 02556 break; 02557 } 02558 } 02559 break; 02560 02561 case NT_CCLASS: 02562 { 02563 CClassNode* xc = NCCLASS(x); 02564 switch (ytype) { 02565 case NT_CTYPE: 02566 switch (NCTYPE(y)->ctype) { 02567 case ONIGENC_CTYPE_WORD: 02568 if (NCTYPE(y)->not == 0) { 02569 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { 02570 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02571 if (BITSET_AT(xc->bs, i)) { 02572 if (NCTYPE(y)->ascii_range) { 02573 if (IS_CODE_SB_WORD(reg->enc, i)) return 0; 02574 } 02575 else { 02576 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0; 02577 } 02578 } 02579 } 02580 return 1; 02581 } 02582 return 0; 02583 } 02584 else { 02585 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02586 int is_word; 02587 if (NCTYPE(y)->ascii_range) 02588 is_word = IS_CODE_SB_WORD(reg->enc, i); 02589 else 02590 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i); 02591 if (! is_word) { 02592 if (!IS_NCCLASS_NOT(xc)) { 02593 if (BITSET_AT(xc->bs, i)) 02594 return 0; 02595 } 02596 else { 02597 if (! BITSET_AT(xc->bs, i)) 02598 return 0; 02599 } 02600 } 02601 } 02602 return 1; 02603 } 02604 break; 02605 02606 default: 02607 break; 02608 } 02609 break; 02610 02611 case NT_CCLASS: 02612 { 02613 int v; 02614 CClassNode* yc = NCCLASS(y); 02615 02616 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02617 v = BITSET_AT(xc->bs, i); 02618 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || 02619 (v == 0 && IS_NCCLASS_NOT(xc))) { 02620 v = BITSET_AT(yc->bs, i); 02621 if ((v != 0 && !IS_NCCLASS_NOT(yc)) || 02622 (v == 0 && IS_NCCLASS_NOT(yc))) 02623 return 0; 02624 } 02625 } 02626 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || 02627 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) 02628 return 1; 02629 return 0; 02630 } 02631 break; 02632 02633 case NT_STR: 02634 goto swap; 02635 break; 02636 02637 default: 02638 break; 02639 } 02640 } 02641 break; 02642 02643 case NT_STR: 02644 { 02645 StrNode* xs = NSTR(x); 02646 if (NSTRING_LEN(x) == 0) 02647 break; 02648 02649 switch (ytype) { 02650 case NT_CTYPE: 02651 switch (NCTYPE(y)->ctype) { 02652 case ONIGENC_CTYPE_WORD: 02653 if (NCTYPE(y)->ascii_range) { 02654 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end)) 02655 return NCTYPE(y)->not; 02656 else 02657 return !(NCTYPE(y)->not); 02658 } 02659 else { 02660 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) 02661 return NCTYPE(y)->not; 02662 else 02663 return !(NCTYPE(y)->not); 02664 } 02665 break; 02666 default: 02667 break; 02668 } 02669 break; 02670 02671 case NT_CCLASS: 02672 { 02673 CClassNode* cc = NCCLASS(y); 02674 02675 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, 02676 xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); 02677 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); 02678 } 02679 break; 02680 02681 case NT_STR: 02682 { 02683 UChar *q; 02684 StrNode* ys = NSTR(y); 02685 len = NSTRING_LEN(x); 02686 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); 02687 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { 02688 /* tiny version */ 02689 return 0; 02690 } 02691 else { 02692 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) { 02693 if (*p != *q) return 1; 02694 } 02695 } 02696 } 02697 break; 02698 02699 default: 02700 break; 02701 } 02702 } 02703 break; 02704 02705 default: 02706 break; 02707 } 02708 02709 return 0; 02710 } 02711 02712 static Node* 02713 get_head_value_node(Node* node, int exact, regex_t* reg) 02714 { 02715 Node* n = NULL_NODE; 02716 02717 switch (NTYPE(node)) { 02718 case NT_BREF: 02719 case NT_ALT: 02720 case NT_CANY: 02721 #ifdef USE_SUBEXP_CALL 02722 case NT_CALL: 02723 #endif 02724 break; 02725 02726 case NT_CTYPE: 02727 case NT_CCLASS: 02728 if (exact == 0) { 02729 n = node; 02730 } 02731 break; 02732 02733 case NT_LIST: 02734 n = get_head_value_node(NCAR(node), exact, reg); 02735 break; 02736 02737 case NT_STR: 02738 { 02739 StrNode* sn = NSTR(node); 02740 02741 if (sn->end <= sn->s) 02742 break; 02743 02744 if (exact != 0 && 02745 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { 02746 } 02747 else { 02748 n = node; 02749 } 02750 } 02751 break; 02752 02753 case NT_QTFR: 02754 { 02755 QtfrNode* qn = NQTFR(node); 02756 if (qn->lower > 0) { 02757 if (IS_NOT_NULL(qn->head_exact)) 02758 n = qn->head_exact; 02759 else 02760 n = get_head_value_node(qn->target, exact, reg); 02761 } 02762 } 02763 break; 02764 02765 case NT_ENCLOSE: 02766 { 02767 EncloseNode* en = NENCLOSE(node); 02768 switch (en->type) { 02769 case ENCLOSE_OPTION: 02770 { 02771 OnigOptionType options = reg->options; 02772 02773 reg->options = NENCLOSE(node)->option; 02774 n = get_head_value_node(NENCLOSE(node)->target, exact, reg); 02775 reg->options = options; 02776 } 02777 break; 02778 02779 case ENCLOSE_MEMORY: 02780 case ENCLOSE_STOP_BACKTRACK: 02781 case ENCLOSE_CONDITION: 02782 n = get_head_value_node(en->target, exact, reg); 02783 break; 02784 } 02785 } 02786 break; 02787 02788 case NT_ANCHOR: 02789 if (NANCHOR(node)->type == ANCHOR_PREC_READ) 02790 n = get_head_value_node(NANCHOR(node)->target, exact, reg); 02791 break; 02792 02793 default: 02794 break; 02795 } 02796 02797 return n; 02798 } 02799 02800 static int 02801 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) 02802 { 02803 int type, r = 0; 02804 02805 type = NTYPE(node); 02806 if ((NTYPE2BIT(type) & type_mask) == 0) 02807 return 1; 02808 02809 switch (type) { 02810 case NT_LIST: 02811 case NT_ALT: 02812 do { 02813 r = check_type_tree(NCAR(node), type_mask, enclose_mask, 02814 anchor_mask); 02815 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02816 break; 02817 02818 case NT_QTFR: 02819 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, 02820 anchor_mask); 02821 break; 02822 02823 case NT_ENCLOSE: 02824 { 02825 EncloseNode* en = NENCLOSE(node); 02826 if ((en->type & enclose_mask) == 0) 02827 return 1; 02828 02829 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); 02830 } 02831 break; 02832 02833 case NT_ANCHOR: 02834 type = NANCHOR(node)->type; 02835 if ((type & anchor_mask) == 0) 02836 return 1; 02837 02838 if (NANCHOR(node)->target) 02839 r = check_type_tree(NANCHOR(node)->target, 02840 type_mask, enclose_mask, anchor_mask); 02841 break; 02842 02843 default: 02844 break; 02845 } 02846 return r; 02847 } 02848 02849 #ifdef USE_SUBEXP_CALL 02850 02851 #define RECURSION_EXIST 1 02852 #define RECURSION_INFINITE 2 02853 02854 static int 02855 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) 02856 { 02857 int type; 02858 int r = 0; 02859 02860 type = NTYPE(node); 02861 switch (type) { 02862 case NT_LIST: 02863 { 02864 Node *x; 02865 OnigDistance min; 02866 int ret; 02867 02868 x = node; 02869 do { 02870 ret = subexp_inf_recursive_check(NCAR(x), env, head); 02871 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02872 r |= ret; 02873 if (head) { 02874 ret = get_min_match_length(NCAR(x), &min, env); 02875 if (ret != 0) return ret; 02876 if (min != 0) head = 0; 02877 } 02878 } while (IS_NOT_NULL(x = NCDR(x))); 02879 } 02880 break; 02881 02882 case NT_ALT: 02883 { 02884 int ret; 02885 r = RECURSION_EXIST; 02886 do { 02887 ret = subexp_inf_recursive_check(NCAR(node), env, head); 02888 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02889 r &= ret; 02890 } while (IS_NOT_NULL(node = NCDR(node))); 02891 } 02892 break; 02893 02894 case NT_QTFR: 02895 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); 02896 if (r == RECURSION_EXIST) { 02897 if (NQTFR(node)->lower == 0) r = 0; 02898 } 02899 break; 02900 02901 case NT_ANCHOR: 02902 { 02903 AnchorNode* an = NANCHOR(node); 02904 switch (an->type) { 02905 case ANCHOR_PREC_READ: 02906 case ANCHOR_PREC_READ_NOT: 02907 case ANCHOR_LOOK_BEHIND: 02908 case ANCHOR_LOOK_BEHIND_NOT: 02909 r = subexp_inf_recursive_check(an->target, env, head); 02910 break; 02911 } 02912 } 02913 break; 02914 02915 case NT_CALL: 02916 r = subexp_inf_recursive_check(NCALL(node)->target, env, head); 02917 break; 02918 02919 case NT_ENCLOSE: 02920 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02921 return 0; 02922 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02923 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); 02924 else { 02925 SET_ENCLOSE_STATUS(node, NST_MARK2); 02926 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); 02927 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02928 } 02929 break; 02930 02931 default: 02932 break; 02933 } 02934 02935 return r; 02936 } 02937 02938 static int 02939 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) 02940 { 02941 int type; 02942 int r = 0; 02943 02944 type = NTYPE(node); 02945 switch (type) { 02946 case NT_LIST: 02947 case NT_ALT: 02948 do { 02949 r = subexp_inf_recursive_check_trav(NCAR(node), env); 02950 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02951 break; 02952 02953 case NT_QTFR: 02954 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); 02955 break; 02956 02957 case NT_ANCHOR: 02958 { 02959 AnchorNode* an = NANCHOR(node); 02960 switch (an->type) { 02961 case ANCHOR_PREC_READ: 02962 case ANCHOR_PREC_READ_NOT: 02963 case ANCHOR_LOOK_BEHIND: 02964 case ANCHOR_LOOK_BEHIND_NOT: 02965 r = subexp_inf_recursive_check_trav(an->target, env); 02966 break; 02967 } 02968 } 02969 break; 02970 02971 case NT_ENCLOSE: 02972 { 02973 EncloseNode* en = NENCLOSE(node); 02974 02975 if (IS_ENCLOSE_RECURSION(en)) { 02976 SET_ENCLOSE_STATUS(node, NST_MARK1); 02977 r = subexp_inf_recursive_check(en->target, env, 1); 02978 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; 02979 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02980 } 02981 r = subexp_inf_recursive_check_trav(en->target, env); 02982 } 02983 02984 break; 02985 02986 default: 02987 break; 02988 } 02989 02990 return r; 02991 } 02992 02993 static int 02994 subexp_recursive_check(Node* node) 02995 { 02996 int r = 0; 02997 02998 switch (NTYPE(node)) { 02999 case NT_LIST: 03000 case NT_ALT: 03001 do { 03002 r |= subexp_recursive_check(NCAR(node)); 03003 } while (IS_NOT_NULL(node = NCDR(node))); 03004 break; 03005 03006 case NT_QTFR: 03007 r = subexp_recursive_check(NQTFR(node)->target); 03008 break; 03009 03010 case NT_ANCHOR: 03011 { 03012 AnchorNode* an = NANCHOR(node); 03013 switch (an->type) { 03014 case ANCHOR_PREC_READ: 03015 case ANCHOR_PREC_READ_NOT: 03016 case ANCHOR_LOOK_BEHIND: 03017 case ANCHOR_LOOK_BEHIND_NOT: 03018 r = subexp_recursive_check(an->target); 03019 break; 03020 } 03021 } 03022 break; 03023 03024 case NT_CALL: 03025 r = subexp_recursive_check(NCALL(node)->target); 03026 if (r != 0) SET_CALL_RECURSION(node); 03027 break; 03028 03029 case NT_ENCLOSE: 03030 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 03031 return 0; 03032 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 03033 return 1; /* recursion */ 03034 else { 03035 SET_ENCLOSE_STATUS(node, NST_MARK2); 03036 r = subexp_recursive_check(NENCLOSE(node)->target); 03037 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 03038 } 03039 break; 03040 03041 default: 03042 break; 03043 } 03044 03045 return r; 03046 } 03047 03048 03049 static int 03050 subexp_recursive_check_trav(Node* node, ScanEnv* env) 03051 { 03052 #define FOUND_CALLED_NODE 1 03053 03054 int type; 03055 int r = 0; 03056 03057 type = NTYPE(node); 03058 switch (type) { 03059 case NT_LIST: 03060 case NT_ALT: 03061 { 03062 int ret; 03063 do { 03064 ret = subexp_recursive_check_trav(NCAR(node), env); 03065 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; 03066 else if (ret < 0) return ret; 03067 } while (IS_NOT_NULL(node = NCDR(node))); 03068 } 03069 break; 03070 03071 case NT_QTFR: 03072 r = subexp_recursive_check_trav(NQTFR(node)->target, env); 03073 if (NQTFR(node)->upper == 0) { 03074 if (r == FOUND_CALLED_NODE) 03075 NQTFR(node)->is_refered = 1; 03076 } 03077 break; 03078 03079 case NT_ANCHOR: 03080 { 03081 AnchorNode* an = NANCHOR(node); 03082 switch (an->type) { 03083 case ANCHOR_PREC_READ: 03084 case ANCHOR_PREC_READ_NOT: 03085 case ANCHOR_LOOK_BEHIND: 03086 case ANCHOR_LOOK_BEHIND_NOT: 03087 r = subexp_recursive_check_trav(an->target, env); 03088 break; 03089 } 03090 } 03091 break; 03092 03093 case NT_ENCLOSE: 03094 { 03095 EncloseNode* en = NENCLOSE(node); 03096 03097 if (! IS_ENCLOSE_RECURSION(en)) { 03098 if (IS_ENCLOSE_CALLED(en)) { 03099 SET_ENCLOSE_STATUS(node, NST_MARK1); 03100 r = subexp_recursive_check(en->target); 03101 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); 03102 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 03103 } 03104 } 03105 r = subexp_recursive_check_trav(en->target, env); 03106 if (IS_ENCLOSE_CALLED(en)) 03107 r |= FOUND_CALLED_NODE; 03108 } 03109 break; 03110 03111 default: 03112 break; 03113 } 03114 03115 return r; 03116 } 03117 03118 static int 03119 setup_subexp_call(Node* node, ScanEnv* env) 03120 { 03121 int type; 03122 int r = 0; 03123 03124 type = NTYPE(node); 03125 switch (type) { 03126 case NT_LIST: 03127 do { 03128 r = setup_subexp_call(NCAR(node), env); 03129 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03130 break; 03131 03132 case NT_ALT: 03133 do { 03134 r = setup_subexp_call(NCAR(node), env); 03135 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03136 break; 03137 03138 case NT_QTFR: 03139 r = setup_subexp_call(NQTFR(node)->target, env); 03140 break; 03141 case NT_ENCLOSE: 03142 r = setup_subexp_call(NENCLOSE(node)->target, env); 03143 break; 03144 03145 case NT_CALL: 03146 { 03147 CallNode* cn = NCALL(node); 03148 Node** nodes = SCANENV_MEM_NODES(env); 03149 03150 if (cn->group_num != 0) { 03151 int gnum = cn->group_num; 03152 03153 #ifdef USE_NAMED_GROUP 03154 if (env->num_named > 0 && 03155 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 03156 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 03157 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 03158 } 03159 #endif 03160 if (gnum > env->num_mem) { 03161 onig_scan_env_set_error_string(env, 03162 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); 03163 return ONIGERR_UNDEFINED_GROUP_REFERENCE; 03164 } 03165 03166 #ifdef USE_NAMED_GROUP 03167 set_call_attr: 03168 #endif 03169 cn->target = nodes[cn->group_num]; 03170 if (IS_NULL(cn->target)) { 03171 onig_scan_env_set_error_string(env, 03172 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03173 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03174 } 03175 SET_ENCLOSE_STATUS(cn->target, NST_CALLED); 03176 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); 03177 cn->unset_addr_list = env->unset_addr_list; 03178 } 03179 #ifdef USE_NAMED_GROUP 03180 #ifdef USE_PERL_SUBEXP_CALL 03181 else if (cn->name == cn->name_end) { 03182 goto set_call_attr; 03183 } 03184 #endif 03185 else { 03186 int *refs; 03187 03188 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, 03189 &refs); 03190 if (n <= 0) { 03191 onig_scan_env_set_error_string(env, 03192 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03193 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03194 } 03195 else if (n > 1 && 03196 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) { 03197 onig_scan_env_set_error_string(env, 03198 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); 03199 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; 03200 } 03201 else { 03202 cn->group_num = refs[0]; 03203 goto set_call_attr; 03204 } 03205 } 03206 #endif 03207 } 03208 break; 03209 03210 case NT_ANCHOR: 03211 { 03212 AnchorNode* an = NANCHOR(node); 03213 03214 switch (an->type) { 03215 case ANCHOR_PREC_READ: 03216 case ANCHOR_PREC_READ_NOT: 03217 case ANCHOR_LOOK_BEHIND: 03218 case ANCHOR_LOOK_BEHIND_NOT: 03219 r = setup_subexp_call(an->target, env); 03220 break; 03221 } 03222 } 03223 break; 03224 03225 default: 03226 break; 03227 } 03228 03229 return r; 03230 } 03231 #endif 03232 03233 /* divide different length alternatives in look-behind. 03234 (?<=A|B) ==> (?<=A)|(?<=B) 03235 (?<!A|B) ==> (?<!A)(?<!B) 03236 */ 03237 static int 03238 divide_look_behind_alternatives(Node* node) 03239 { 03240 Node *head, *np, *insert_node; 03241 AnchorNode* an = NANCHOR(node); 03242 int anc_type = an->type; 03243 03244 head = an->target; 03245 np = NCAR(head); 03246 swap_node(node, head); 03247 NCAR(node) = head; 03248 NANCHOR(head)->target = np; 03249 03250 np = node; 03251 while ((np = NCDR(np)) != NULL_NODE) { 03252 insert_node = onig_node_new_anchor(anc_type); 03253 CHECK_NULL_RETURN_MEMERR(insert_node); 03254 NANCHOR(insert_node)->target = NCAR(np); 03255 NCAR(np) = insert_node; 03256 } 03257 03258 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { 03259 np = node; 03260 do { 03261 SET_NTYPE(np, NT_LIST); /* alt -> list */ 03262 } while ((np = NCDR(np)) != NULL_NODE); 03263 } 03264 return 0; 03265 } 03266 03267 static int 03268 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) 03269 { 03270 int r, len; 03271 AnchorNode* an = NANCHOR(node); 03272 03273 r = get_char_length_tree(an->target, reg, &len); 03274 if (r == 0) 03275 an->char_len = len; 03276 else if (r == GET_CHAR_LEN_VARLEN) 03277 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03278 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { 03279 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) 03280 r = divide_look_behind_alternatives(node); 03281 else 03282 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03283 } 03284 03285 return r; 03286 } 03287 03288 static int 03289 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg) 03290 { 03291 int type; 03292 03293 retry: 03294 type = NTYPE(node); 03295 if (type == NT_QTFR) { 03296 QtfrNode* qn = NQTFR(node); 03297 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { 03298 #ifdef USE_QTFR_PEEK_NEXT 03299 Node* n = get_head_value_node(next_node, 1, reg); 03300 /* '\0': for UTF-16BE etc... */ 03301 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { 03302 qn->next_head_exact = n; 03303 } 03304 #endif 03305 /* automatic possessivation a*b ==> (?>a*)b */ 03306 if (qn->lower <= 1) { 03307 int ttype = NTYPE(qn->target); 03308 if (IS_NODE_TYPE_SIMPLE(ttype)) { 03309 Node *x, *y; 03310 x = get_head_value_node(qn->target, 0, reg); 03311 if (IS_NOT_NULL(x)) { 03312 y = get_head_value_node(next_node, 0, reg); 03313 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { 03314 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); 03315 CHECK_NULL_RETURN_MEMERR(en); 03316 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); 03317 swap_node(node, en); 03318 NENCLOSE(node)->target = en; 03319 } 03320 } 03321 } 03322 } 03323 03324 #ifndef ONIG_DONT_OPTIMIZE 03325 if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */ 03326 in_root && /* qn->lower == 0 && */ 03327 NTYPE(qn->target) == NT_CANY && 03328 ! IS_MULTILINE(reg->options)) { 03329 /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */ 03330 Node *np; 03331 np = onig_node_new_list(NULL_NODE, NULL_NODE); 03332 CHECK_NULL_RETURN_MEMERR(np); 03333 swap_node(node, np); 03334 NCDR(node) = onig_node_new_list(np, NULL_NODE); 03335 if (IS_NULL(NCDR(node))) { 03336 onig_node_free(np); 03337 return ONIGERR_MEMORY; 03338 } 03339 np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */ 03340 CHECK_NULL_RETURN_MEMERR(np); 03341 NCAR(node) = np; 03342 } 03343 #endif 03344 } 03345 } 03346 else if (type == NT_ENCLOSE) { 03347 EncloseNode* en = NENCLOSE(node); 03348 in_root = 0; 03349 if (en->type == ENCLOSE_MEMORY) { 03350 node = en->target; 03351 goto retry; 03352 } 03353 } 03354 return 0; 03355 } 03356 03357 03358 static int 03359 update_string_node_case_fold(regex_t* reg, Node *node) 03360 { 03361 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 03362 UChar *sbuf, *ebuf, *sp; 03363 int r, i, len; 03364 OnigDistance sbuf_size; 03365 StrNode* sn = NSTR(node); 03366 03367 end = sn->end; 03368 sbuf_size = (end - sn->s) * 2; 03369 sbuf = (UChar* )xmalloc(sbuf_size); 03370 CHECK_NULL_RETURN_MEMERR(sbuf); 03371 ebuf = sbuf + sbuf_size; 03372 03373 sp = sbuf; 03374 p = sn->s; 03375 while (p < end) { 03376 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); 03377 for (i = 0; i < len; i++) { 03378 if (sp >= ebuf) { 03379 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2); 03380 if (IS_NULL(p)) { 03381 xfree(sbuf); 03382 return ONIGERR_MEMORY; 03383 } 03384 sbuf = p; 03385 sp = sbuf + sbuf_size; 03386 sbuf_size *= 2; 03387 ebuf = sbuf + sbuf_size; 03388 } 03389 03390 *sp++ = buf[i]; 03391 } 03392 } 03393 03394 r = onig_node_str_set(node, sbuf, sp); 03395 if (r != 0) { 03396 xfree(sbuf); 03397 return r; 03398 } 03399 03400 xfree(sbuf); 03401 return 0; 03402 } 03403 03404 static int 03405 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, 03406 regex_t* reg) 03407 { 03408 int r; 03409 Node *node; 03410 03411 node = onig_node_new_str(s, end); 03412 if (IS_NULL(node)) return ONIGERR_MEMORY; 03413 03414 r = update_string_node_case_fold(reg, node); 03415 if (r != 0) { 03416 onig_node_free(node); 03417 return r; 03418 } 03419 03420 NSTRING_SET_AMBIG(node); 03421 NSTRING_SET_DONT_GET_OPT_INFO(node); 03422 *rnode = node; 03423 return 0; 03424 } 03425 03426 static int 03427 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], 03428 UChar *p, int slen, UChar *end, 03429 regex_t* reg, Node **rnode) 03430 { 03431 int r, i, j, len, varlen, varclen; 03432 Node *anode, *var_anode, *snode, *xnode, *an; 03433 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 03434 03435 *rnode = var_anode = NULL_NODE; 03436 03437 varlen = 0; 03438 varclen = 0; 03439 for (i = 0; i < item_num; i++) { 03440 if (items[i].byte_len != slen) { 03441 varlen = 1; 03442 break; 03443 } 03444 if (items[i].code_len != 1) { 03445 varclen = 1; 03446 } 03447 } 03448 03449 if (varlen != 0) { 03450 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03451 if (IS_NULL(var_anode)) return ONIGERR_MEMORY; 03452 03453 xnode = onig_node_new_list(NULL, NULL); 03454 if (IS_NULL(xnode)) goto mem_err; 03455 NCAR(var_anode) = xnode; 03456 03457 anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03458 if (IS_NULL(anode)) goto mem_err; 03459 NCAR(xnode) = anode; 03460 } 03461 else { 03462 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03463 if (IS_NULL(anode)) return ONIGERR_MEMORY; 03464 } 03465 03466 snode = onig_node_new_str(p, p + slen); 03467 if (IS_NULL(snode)) goto mem_err; 03468 03469 NCAR(anode) = snode; 03470 03471 for (i = 0; i < item_num; i++) { 03472 snode = onig_node_new_str(NULL, NULL); 03473 if (IS_NULL(snode)) goto mem_err; 03474 03475 for (j = 0; j < items[i].code_len; j++) { 03476 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); 03477 if (len < 0) { 03478 r = len; 03479 goto mem_err2; 03480 } 03481 03482 r = onig_node_str_cat(snode, buf, buf + len); 03483 if (r != 0) goto mem_err2; 03484 } 03485 03486 an = onig_node_new_alt(NULL_NODE, NULL_NODE); 03487 if (IS_NULL(an)) { 03488 goto mem_err2; 03489 } 03490 03491 if (items[i].byte_len != slen) { 03492 Node *rem; 03493 UChar *q = p + items[i].byte_len; 03494 03495 if (q < end) { 03496 r = expand_case_fold_make_rem_string(&rem, q, end, reg); 03497 if (r != 0) { 03498 onig_node_free(an); 03499 goto mem_err2; 03500 } 03501 03502 xnode = onig_node_list_add(NULL_NODE, snode); 03503 if (IS_NULL(xnode)) { 03504 onig_node_free(an); 03505 onig_node_free(rem); 03506 goto mem_err2; 03507 } 03508 if (IS_NULL(onig_node_list_add(xnode, rem))) { 03509 onig_node_free(an); 03510 onig_node_free(xnode); 03511 onig_node_free(rem); 03512 goto mem_err; 03513 } 03514 03515 NCAR(an) = xnode; 03516 } 03517 else { 03518 NCAR(an) = snode; 03519 } 03520 03521 NCDR(var_anode) = an; 03522 var_anode = an; 03523 } 03524 else { 03525 NCAR(an) = snode; 03526 NCDR(anode) = an; 03527 anode = an; 03528 } 03529 } 03530 03531 if (varclen && !varlen) 03532 return 2; 03533 return varlen; 03534 03535 mem_err2: 03536 onig_node_free(snode); 03537 03538 mem_err: 03539 onig_node_free(*rnode); 03540 03541 return ONIGERR_MEMORY; 03542 } 03543 03544 static int 03545 expand_case_fold_string(Node* node, regex_t* reg) 03546 { 03547 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 03548 03549 int r, n, len, alt_num; 03550 int varlen = 0; 03551 UChar *start, *end, *p; 03552 Node *top_root, *root, *snode, *prev_node; 03553 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 03554 StrNode* sn = NSTR(node); 03555 03556 if (NSTRING_IS_AMBIG(node)) return 0; 03557 03558 start = sn->s; 03559 end = sn->end; 03560 if (start >= end) return 0; 03561 03562 r = 0; 03563 top_root = root = prev_node = snode = NULL_NODE; 03564 alt_num = 1; 03565 p = start; 03566 while (p < end) { 03567 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, 03568 p, end, items); 03569 if (n < 0) { 03570 r = n; 03571 goto err; 03572 } 03573 03574 len = enclen(reg->enc, p, end); 03575 03576 if (n == 0) { 03577 if (IS_NULL(snode)) { 03578 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03579 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03580 if (IS_NULL(root)) { 03581 onig_node_free(prev_node); 03582 goto mem_err; 03583 } 03584 } 03585 03586 prev_node = snode = onig_node_new_str(NULL, NULL); 03587 if (IS_NULL(snode)) goto mem_err; 03588 if (IS_NOT_NULL(root)) { 03589 if (IS_NULL(onig_node_list_add(root, snode))) { 03590 onig_node_free(snode); 03591 goto mem_err; 03592 } 03593 } 03594 } 03595 03596 r = onig_node_str_cat(snode, p, p + len); 03597 if (r != 0) goto err; 03598 } 03599 else { 03600 alt_num *= (n + 1); 03601 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 03602 03603 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03604 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03605 if (IS_NULL(root)) { 03606 onig_node_free(prev_node); 03607 goto mem_err; 03608 } 03609 } 03610 03611 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); 03612 if (r < 0) goto mem_err; 03613 if (r > 0) varlen = 1; 03614 if (r == 1) { 03615 if (IS_NULL(root)) { 03616 top_root = prev_node; 03617 } 03618 else { 03619 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03620 onig_node_free(prev_node); 03621 goto mem_err; 03622 } 03623 } 03624 03625 root = NCAR(prev_node); 03626 } 03627 else { /* r == 0 || r == 2 */ 03628 if (IS_NOT_NULL(root)) { 03629 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03630 onig_node_free(prev_node); 03631 goto mem_err; 03632 } 03633 } 03634 } 03635 03636 snode = NULL_NODE; 03637 } 03638 03639 p += len; 03640 } 03641 03642 if (p < end) { 03643 Node *srem; 03644 03645 r = expand_case_fold_make_rem_string(&srem, p, end, reg); 03646 if (r != 0) goto mem_err; 03647 03648 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { 03649 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03650 if (IS_NULL(root)) { 03651 onig_node_free(srem); 03652 onig_node_free(prev_node); 03653 goto mem_err; 03654 } 03655 } 03656 03657 if (IS_NULL(root)) { 03658 prev_node = srem; 03659 } 03660 else { 03661 if (IS_NULL(onig_node_list_add(root, srem))) { 03662 onig_node_free(srem); 03663 goto mem_err; 03664 } 03665 } 03666 } 03667 03668 /* ending */ 03669 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); 03670 if (!varlen) { 03671 /* When all expanded strings are same length, case-insensitive 03672 BM search will be used. */ 03673 r = update_string_node_case_fold(reg, node); 03674 if (r == 0) { 03675 NSTRING_SET_AMBIG(node); 03676 } 03677 } 03678 else { 03679 swap_node(node, top_root); 03680 r = 0; 03681 } 03682 onig_node_free(top_root); 03683 return r; 03684 03685 mem_err: 03686 r = ONIGERR_MEMORY; 03687 03688 err: 03689 onig_node_free(top_root); 03690 return r; 03691 } 03692 03693 03694 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03695 03696 #define CEC_THRES_NUM_BIG_REPEAT 512 03697 #define CEC_INFINITE_NUM 0x7fffffff 03698 03699 #define CEC_IN_INFINITE_REPEAT (1<<0) 03700 #define CEC_IN_FINITE_REPEAT (1<<1) 03701 #define CEC_CONT_BIG_REPEAT (1<<2) 03702 03703 static int 03704 setup_comb_exp_check(Node* node, int state, ScanEnv* env) 03705 { 03706 int type; 03707 int r = state; 03708 03709 type = NTYPE(node); 03710 switch (type) { 03711 case NT_LIST: 03712 { 03713 Node* prev = NULL_NODE; 03714 do { 03715 r = setup_comb_exp_check(NCAR(node), r, env); 03716 prev = NCAR(node); 03717 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); 03718 } 03719 break; 03720 03721 case NT_ALT: 03722 { 03723 int ret; 03724 do { 03725 ret = setup_comb_exp_check(NCAR(node), state, env); 03726 r |= ret; 03727 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); 03728 } 03729 break; 03730 03731 case NT_QTFR: 03732 { 03733 int child_state = state; 03734 int add_state = 0; 03735 QtfrNode* qn = NQTFR(node); 03736 Node* target = qn->target; 03737 int var_num; 03738 03739 if (! IS_REPEAT_INFINITE(qn->upper)) { 03740 if (qn->upper > 1) { 03741 /* {0,1}, {1,1} are allowed */ 03742 child_state |= CEC_IN_FINITE_REPEAT; 03743 03744 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 03745 if (env->backrefed_mem == 0) { 03746 if (NTYPE(qn->target) == NT_ENCLOSE) { 03747 EncloseNode* en = NENCLOSE(qn->target); 03748 if (en->type == ENCLOSE_MEMORY) { 03749 if (NTYPE(en->target) == NT_QTFR) { 03750 QtfrNode* q = NQTFR(en->target); 03751 if (IS_REPEAT_INFINITE(q->upper) 03752 && q->greedy == qn->greedy) { 03753 qn->upper = (qn->lower == 0 ? 1 : qn->lower); 03754 if (qn->upper == 1) 03755 child_state = state; 03756 } 03757 } 03758 } 03759 } 03760 } 03761 } 03762 } 03763 03764 if (state & CEC_IN_FINITE_REPEAT) { 03765 qn->comb_exp_check_num = -1; 03766 } 03767 else { 03768 if (IS_REPEAT_INFINITE(qn->upper)) { 03769 var_num = CEC_INFINITE_NUM; 03770 child_state |= CEC_IN_INFINITE_REPEAT; 03771 } 03772 else { 03773 var_num = qn->upper - qn->lower; 03774 } 03775 03776 if (var_num >= CEC_THRES_NUM_BIG_REPEAT) 03777 add_state |= CEC_CONT_BIG_REPEAT; 03778 03779 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || 03780 ((state & CEC_CONT_BIG_REPEAT) != 0 && 03781 var_num >= CEC_THRES_NUM_BIG_REPEAT)) { 03782 if (qn->comb_exp_check_num == 0) { 03783 env->num_comb_exp_check++; 03784 qn->comb_exp_check_num = env->num_comb_exp_check; 03785 if (env->curr_max_regnum > env->comb_exp_max_regnum) 03786 env->comb_exp_max_regnum = env->curr_max_regnum; 03787 } 03788 } 03789 } 03790 03791 r = setup_comb_exp_check(target, child_state, env); 03792 r |= add_state; 03793 } 03794 break; 03795 03796 case NT_ENCLOSE: 03797 { 03798 EncloseNode* en = NENCLOSE(node); 03799 03800 switch (en->type) { 03801 case ENCLOSE_MEMORY: 03802 { 03803 if (env->curr_max_regnum < en->regnum) 03804 env->curr_max_regnum = en->regnum; 03805 03806 r = setup_comb_exp_check(en->target, state, env); 03807 } 03808 break; 03809 03810 default: 03811 r = setup_comb_exp_check(en->target, state, env); 03812 break; 03813 } 03814 } 03815 break; 03816 03817 #ifdef USE_SUBEXP_CALL 03818 case NT_CALL: 03819 if (IS_CALL_RECURSION(NCALL(node))) 03820 env->has_recursion = 1; 03821 else 03822 r = setup_comb_exp_check(NCALL(node)->target, state, env); 03823 break; 03824 #endif 03825 03826 default: 03827 break; 03828 } 03829 03830 return r; 03831 } 03832 #endif 03833 03834 #define IN_ALT (1<<0) 03835 #define IN_NOT (1<<1) 03836 #define IN_REPEAT (1<<2) 03837 #define IN_VAR_REPEAT (1<<3) 03838 #define IN_ROOT (1<<4) 03839 03840 /* setup_tree does the following work. 03841 1. check empty loop. (set qn->target_empty_info) 03842 2. expand ignore-case in char class. 03843 3. set memory status bit flags. (reg->mem_stats) 03844 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 03845 5. find invalid patterns in look-behind. 03846 6. expand repeated string. 03847 */ 03848 static int 03849 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) 03850 { 03851 int type; 03852 int r = 0; 03853 int in_root = state & IN_ROOT; 03854 03855 state &= ~IN_ROOT; 03856 restart: 03857 type = NTYPE(node); 03858 switch (type) { 03859 case NT_LIST: 03860 { 03861 Node* prev = NULL_NODE; 03862 int prev_in_root = 0; 03863 state |= in_root; 03864 do { 03865 r = setup_tree(NCAR(node), reg, state, env); 03866 if (IS_NOT_NULL(prev) && r == 0) { 03867 r = next_setup(prev, NCAR(node), prev_in_root, reg); 03868 } 03869 prev = NCAR(node); 03870 prev_in_root = state & IN_ROOT; 03871 state &= ~IN_ROOT; 03872 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03873 } 03874 break; 03875 03876 case NT_ALT: 03877 do { 03878 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); 03879 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03880 break; 03881 03882 case NT_CCLASS: 03883 break; 03884 03885 case NT_STR: 03886 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { 03887 r = expand_case_fold_string(node, reg); 03888 } 03889 break; 03890 03891 case NT_CTYPE: 03892 case NT_CANY: 03893 break; 03894 03895 #ifdef USE_SUBEXP_CALL 03896 case NT_CALL: 03897 break; 03898 #endif 03899 03900 case NT_BREF: 03901 { 03902 int i; 03903 int* p; 03904 Node** nodes = SCANENV_MEM_NODES(env); 03905 BRefNode* br = NBREF(node); 03906 p = BACKREFS_P(br); 03907 for (i = 0; i < br->back_num; i++) { 03908 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 03909 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); 03910 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); 03911 #ifdef USE_BACKREF_WITH_LEVEL 03912 if (IS_BACKREF_NEST_LEVEL(br)) { 03913 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); 03914 } 03915 #endif 03916 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); 03917 } 03918 } 03919 break; 03920 03921 case NT_QTFR: 03922 { 03923 OnigDistance d; 03924 QtfrNode* qn = NQTFR(node); 03925 Node* target = qn->target; 03926 03927 if ((state & IN_REPEAT) != 0) { 03928 qn->state |= NST_IN_REPEAT; 03929 } 03930 03931 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { 03932 r = get_min_match_length(target, &d, env); 03933 if (r) break; 03934 if (d == 0) { 03935 qn->target_empty_info = NQ_TARGET_IS_EMPTY; 03936 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 03937 r = quantifiers_memory_node_info(target); 03938 if (r < 0) break; 03939 if (r > 0) { 03940 qn->target_empty_info = r; 03941 } 03942 #endif 03943 #if 0 03944 r = get_max_match_length(target, &d, env); 03945 if (r == 0 && d == 0) { 03946 /* ()* ==> ()?, ()+ ==> () */ 03947 qn->upper = 1; 03948 if (qn->lower > 1) qn->lower = 1; 03949 if (NTYPE(target) == NT_STR) { 03950 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ 03951 } 03952 } 03953 #endif 03954 } 03955 } 03956 03957 state |= IN_REPEAT; 03958 if (qn->lower != qn->upper) 03959 state |= IN_VAR_REPEAT; 03960 r = setup_tree(target, reg, state, env); 03961 if (r) break; 03962 03963 /* expand string */ 03964 #define EXPAND_STRING_MAX_LENGTH 100 03965 if (NTYPE(target) == NT_STR) { 03966 if (qn->lower > 1) { 03967 int i, n = qn->lower; 03968 OnigDistance len = NSTRING_LEN(target); 03969 StrNode* sn = NSTR(target); 03970 Node* np; 03971 03972 np = onig_node_new_str(sn->s, sn->end); 03973 if (IS_NULL(np)) return ONIGERR_MEMORY; 03974 NSTR(np)->flag = sn->flag; 03975 03976 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) { 03977 r = onig_node_str_cat(np, sn->s, sn->end); 03978 if (r) { 03979 onig_node_free(np); 03980 return r; 03981 } 03982 } 03983 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) { 03984 Node *np1, *np2; 03985 03986 qn->lower -= i; 03987 if (! IS_REPEAT_INFINITE(qn->upper)) 03988 qn->upper -= i; 03989 03990 np1 = onig_node_new_list(np, NULL); 03991 if (IS_NULL(np1)) { 03992 onig_node_free(np); 03993 return ONIGERR_MEMORY; 03994 } 03995 swap_node(np1, node); 03996 np2 = onig_node_list_add(node, np1); 03997 if (IS_NULL(np2)) { 03998 onig_node_free(np1); 03999 return ONIGERR_MEMORY; 04000 } 04001 } 04002 else { 04003 swap_node(np, node); 04004 onig_node_free(np); 04005 } 04006 break; /* break case NT_QTFR: */ 04007 } 04008 } 04009 04010 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 04011 if (qn->greedy && (qn->target_empty_info != 0)) { 04012 if (NTYPE(target) == NT_QTFR) { 04013 QtfrNode* tqn = NQTFR(target); 04014 if (IS_NOT_NULL(tqn->head_exact)) { 04015 qn->head_exact = tqn->head_exact; 04016 tqn->head_exact = NULL; 04017 } 04018 } 04019 else { 04020 qn->head_exact = get_head_value_node(qn->target, 1, reg); 04021 } 04022 } 04023 #endif 04024 } 04025 break; 04026 04027 case NT_ENCLOSE: 04028 { 04029 EncloseNode* en = NENCLOSE(node); 04030 04031 switch (en->type) { 04032 case ENCLOSE_OPTION: 04033 { 04034 OnigOptionType options = reg->options; 04035 state |= in_root; 04036 reg->options = NENCLOSE(node)->option; 04037 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 04038 reg->options = options; 04039 } 04040 break; 04041 04042 case ENCLOSE_MEMORY: 04043 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 04044 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); 04045 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 04046 } 04047 r = setup_tree(en->target, reg, state, env); 04048 break; 04049 04050 case ENCLOSE_STOP_BACKTRACK: 04051 { 04052 Node* target = en->target; 04053 r = setup_tree(target, reg, state, env); 04054 if (NTYPE(target) == NT_QTFR) { 04055 QtfrNode* tqn = NQTFR(target); 04056 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && 04057 tqn->greedy != 0) { /* (?>a*), a*+ etc... */ 04058 int qtype = NTYPE(tqn->target); 04059 if (IS_NODE_TYPE_SIMPLE(qtype)) 04060 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); 04061 } 04062 } 04063 } 04064 break; 04065 04066 case ENCLOSE_CONDITION: 04067 #ifdef USE_NAMED_GROUP 04068 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) && 04069 env->num_named > 0 && 04070 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 04071 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 04072 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 04073 } 04074 #endif 04075 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 04076 break; 04077 } 04078 } 04079 break; 04080 04081 case NT_ANCHOR: 04082 { 04083 AnchorNode* an = NANCHOR(node); 04084 04085 switch (an->type) { 04086 case ANCHOR_PREC_READ: 04087 r = setup_tree(an->target, reg, state, env); 04088 break; 04089 case ANCHOR_PREC_READ_NOT: 04090 r = setup_tree(an->target, reg, (state | IN_NOT), env); 04091 break; 04092 04093 /* allowed node types in look-behind */ 04094 #define ALLOWED_TYPE_IN_LB \ 04095 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ 04096 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) 04097 04098 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION ) 04099 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION 04100 04101 #define ALLOWED_ANCHOR_IN_LB \ 04102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \ 04103 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \ 04104 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \ 04105 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) 04106 #define ALLOWED_ANCHOR_IN_LB_NOT \ 04107 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \ 04108 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \ 04109 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \ 04110 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) 04111 04112 case ANCHOR_LOOK_BEHIND: 04113 { 04114 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 04115 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); 04116 if (r < 0) return r; 04117 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 04118 r = setup_look_behind(node, reg, env); 04119 if (r != 0) return r; 04120 if (NTYPE(node) != NT_ANCHOR) goto restart; 04121 r = setup_tree(an->target, reg, state, env); 04122 } 04123 break; 04124 04125 case ANCHOR_LOOK_BEHIND_NOT: 04126 { 04127 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 04128 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); 04129 if (r < 0) return r; 04130 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 04131 r = setup_look_behind(node, reg, env); 04132 if (r != 0) return r; 04133 if (NTYPE(node) != NT_ANCHOR) goto restart; 04134 r = setup_tree(an->target, reg, (state | IN_NOT), env); 04135 } 04136 break; 04137 } 04138 } 04139 break; 04140 04141 default: 04142 break; 04143 } 04144 04145 return r; 04146 } 04147 04148 #ifndef USE_SUNDAY_QUICK_SEARCH 04149 /* set skip map for Boyer-Moore search */ 04150 static int 04151 set_bm_skip(UChar* s, UChar* end, regex_t* reg, 04152 UChar skip[], int** int_skip, int ignore_case) 04153 { 04154 OnigDistance i, len; 04155 int clen, flen, n, j, k; 04156 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; 04157 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04158 OnigEncoding enc = reg->enc; 04159 04160 len = end - s; 04161 if (len < ONIG_CHAR_TABLE_SIZE) { 04162 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len; 04163 04164 n = 0; 04165 for (i = 0; i < len - 1; i += clen) { 04166 p = s + i; 04167 if (ignore_case) 04168 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04169 p, end, items); 04170 clen = enclen(enc, p, end); 04171 04172 for (j = 0; j < n; j++) { 04173 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04174 return 1; /* different length isn't supported. */ 04175 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04176 if (flen != clen) 04177 return 1; /* different length isn't supported. */ 04178 } 04179 for (j = 0; j < clen; j++) { 04180 skip[s[i + j]] = (UChar )(len - 1 - i - j); 04181 for (k = 0; k < n; k++) { 04182 skip[buf[k][j]] = (UChar )(len - 1 - i - j); 04183 } 04184 } 04185 } 04186 } 04187 else { 04188 if (IS_NULL(*int_skip)) { 04189 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 04190 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 04191 } 04192 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len; 04193 04194 n = 0; 04195 for (i = 0; i < len - 1; i += clen) { 04196 p = s + i; 04197 if (ignore_case) 04198 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04199 p, end, items); 04200 clen = enclen(enc, p, end); 04201 04202 for (j = 0; j < n; j++) { 04203 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04204 return 1; /* different length isn't supported. */ 04205 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04206 if (flen != clen) 04207 return 1; /* different length isn't supported. */ 04208 } 04209 for (j = 0; j < clen; j++) { 04210 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); 04211 for (k = 0; k < n; k++) { 04212 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); 04213 } 04214 } 04215 } 04216 } 04217 return 0; 04218 } 04219 04220 #else /* USE_SUNDAY_QUICK_SEARCH */ 04221 04222 /* set skip map for Sunday's quick search */ 04223 static int 04224 set_bm_skip(UChar* s, UChar* end, regex_t* reg, 04225 UChar skip[], int** int_skip, int ignore_case) 04226 { 04227 OnigDistance i, len; 04228 int clen, flen, n, j, k; 04229 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; 04230 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04231 OnigEncoding enc = reg->enc; 04232 04233 len = end - s; 04234 if (len < ONIG_CHAR_TABLE_SIZE) { 04235 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1); 04236 04237 n = 0; 04238 for (i = 0; i < len; i += clen) { 04239 p = s + i; 04240 if (ignore_case) 04241 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04242 p, end, items); 04243 clen = enclen(enc, p, end); 04244 04245 for (j = 0; j < n; j++) { 04246 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04247 return 1; /* different length isn't supported. */ 04248 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04249 if (flen != clen) 04250 return 1; /* different length isn't supported. */ 04251 } 04252 for (j = 0; j < clen; j++) { 04253 skip[s[i + j]] = (UChar )(len - i - j); 04254 for (k = 0; k < n; k++) { 04255 skip[buf[k][j]] = (UChar )(len - i - j); 04256 } 04257 } 04258 } 04259 } 04260 else { 04261 if (IS_NULL(*int_skip)) { 04262 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 04263 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 04264 } 04265 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1); 04266 04267 n = 0; 04268 for (i = 0; i < len; i += clen) { 04269 p = s + i; 04270 if (ignore_case) 04271 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04272 p, end, items); 04273 clen = enclen(enc, p, end); 04274 04275 for (j = 0; j < n; j++) { 04276 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04277 return 1; /* different length isn't supported. */ 04278 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04279 if (flen != clen) 04280 return 1; /* different length isn't supported. */ 04281 } 04282 for (j = 0; j < clen; j++) { 04283 (*int_skip)[s[i + j]] = (int )(len - i - j); 04284 for (k = 0; k < n; k++) { 04285 (*int_skip)[buf[k][j]] = (int )(len - i - j); 04286 } 04287 } 04288 } 04289 } 04290 return 0; 04291 } 04292 #endif /* USE_SUNDAY_QUICK_SEARCH */ 04293 04294 #define OPT_EXACT_MAXLEN 24 04295 04296 typedef struct { 04297 OnigDistance min; /* min byte length */ 04298 OnigDistance max; /* max byte length */ 04299 } MinMaxLen; 04300 04301 typedef struct { 04302 MinMaxLen mmd; 04303 OnigEncoding enc; 04304 OnigOptionType options; 04305 OnigCaseFoldType case_fold_flag; 04306 ScanEnv* scan_env; 04307 } OptEnv; 04308 04309 typedef struct { 04310 int left_anchor; 04311 int right_anchor; 04312 } OptAncInfo; 04313 04314 typedef struct { 04315 MinMaxLen mmd; /* info position */ 04316 OptAncInfo anc; 04317 04318 int reach_end; 04319 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */ 04320 int len; 04321 UChar s[OPT_EXACT_MAXLEN]; 04322 } OptExactInfo; 04323 04324 typedef struct { 04325 MinMaxLen mmd; /* info position */ 04326 OptAncInfo anc; 04327 04328 int value; /* weighted value */ 04329 UChar map[ONIG_CHAR_TABLE_SIZE]; 04330 } OptMapInfo; 04331 04332 typedef struct { 04333 MinMaxLen len; 04334 04335 OptAncInfo anc; 04336 OptExactInfo exb; /* boundary */ 04337 OptExactInfo exm; /* middle */ 04338 OptExactInfo expr; /* prec read (?=...) */ 04339 04340 OptMapInfo map; /* boundary */ 04341 } NodeOptInfo; 04342 04343 04344 static int 04345 map_position_value(OnigEncoding enc, int i) 04346 { 04347 static const short int ByteValTable[] = { 04348 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 04349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 04350 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 04351 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 04352 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04353 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 04354 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04355 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 04356 }; 04357 04358 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { 04359 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) 04360 return 20; 04361 else 04362 return (int )ByteValTable[i]; 04363 } 04364 else 04365 return 4; /* Take it easy. */ 04366 } 04367 04368 static int 04369 distance_value(MinMaxLen* mm) 04370 { 04371 /* 1000 / (min-max-dist + 1) */ 04372 static const short int dist_vals[] = { 04373 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 04374 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 04375 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 04376 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 04377 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 04378 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 04379 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 04380 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 04381 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 04382 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 04383 }; 04384 04385 OnigDistance d; 04386 04387 if (mm->max == ONIG_INFINITE_DISTANCE) return 0; 04388 04389 d = mm->max - mm->min; 04390 if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) 04391 /* return dist_vals[d] * 16 / (mm->min + 12); */ 04392 return (int )dist_vals[d]; 04393 else 04394 return 1; 04395 } 04396 04397 static int 04398 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) 04399 { 04400 if (v2 <= 0) return -1; 04401 if (v1 <= 0) return 1; 04402 04403 v1 *= distance_value(d1); 04404 v2 *= distance_value(d2); 04405 04406 if (v2 > v1) return 1; 04407 if (v2 < v1) return -1; 04408 04409 if (d2->min < d1->min) return 1; 04410 if (d2->min > d1->min) return -1; 04411 return 0; 04412 } 04413 04414 static int 04415 is_equal_mml(MinMaxLen* a, MinMaxLen* b) 04416 { 04417 return (a->min == b->min && a->max == b->max) ? 1 : 0; 04418 } 04419 04420 04421 static void 04422 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) 04423 { 04424 mml->min = min; 04425 mml->max = max; 04426 } 04427 04428 static void 04429 clear_mml(MinMaxLen* mml) 04430 { 04431 mml->min = mml->max = 0; 04432 } 04433 04434 static void 04435 copy_mml(MinMaxLen* to, MinMaxLen* from) 04436 { 04437 to->min = from->min; 04438 to->max = from->max; 04439 } 04440 04441 static void 04442 add_mml(MinMaxLen* to, MinMaxLen* from) 04443 { 04444 to->min = distance_add(to->min, from->min); 04445 to->max = distance_add(to->max, from->max); 04446 } 04447 04448 #if 0 04449 static void 04450 add_len_mml(MinMaxLen* to, OnigDistance len) 04451 { 04452 to->min = distance_add(to->min, len); 04453 to->max = distance_add(to->max, len); 04454 } 04455 #endif 04456 04457 static void 04458 alt_merge_mml(MinMaxLen* to, MinMaxLen* from) 04459 { 04460 if (to->min > from->min) to->min = from->min; 04461 if (to->max < from->max) to->max = from->max; 04462 } 04463 04464 static void 04465 copy_opt_env(OptEnv* to, OptEnv* from) 04466 { 04467 *to = *from; 04468 } 04469 04470 static void 04471 clear_opt_anc_info(OptAncInfo* anc) 04472 { 04473 anc->left_anchor = 0; 04474 anc->right_anchor = 0; 04475 } 04476 04477 static void 04478 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) 04479 { 04480 *to = *from; 04481 } 04482 04483 static void 04484 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, 04485 OnigDistance left_len, OnigDistance right_len) 04486 { 04487 clear_opt_anc_info(to); 04488 04489 to->left_anchor = left->left_anchor; 04490 if (left_len == 0) { 04491 to->left_anchor |= right->left_anchor; 04492 } 04493 04494 to->right_anchor = right->right_anchor; 04495 if (right_len == 0) { 04496 to->right_anchor |= left->right_anchor; 04497 } 04498 } 04499 04500 static int 04501 is_left_anchor(int anc) 04502 { 04503 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || 04504 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || 04505 anc == ANCHOR_PREC_READ_NOT) 04506 return 0; 04507 04508 return 1; 04509 } 04510 04511 static int 04512 is_set_opt_anc_info(OptAncInfo* to, int anc) 04513 { 04514 if ((to->left_anchor & anc) != 0) return 1; 04515 04516 return ((to->right_anchor & anc) != 0 ? 1 : 0); 04517 } 04518 04519 static void 04520 add_opt_anc_info(OptAncInfo* to, int anc) 04521 { 04522 if (is_left_anchor(anc)) 04523 to->left_anchor |= anc; 04524 else 04525 to->right_anchor |= anc; 04526 } 04527 04528 static void 04529 remove_opt_anc_info(OptAncInfo* to, int anc) 04530 { 04531 if (is_left_anchor(anc)) 04532 to->left_anchor &= ~anc; 04533 else 04534 to->right_anchor &= ~anc; 04535 } 04536 04537 static void 04538 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) 04539 { 04540 to->left_anchor &= add->left_anchor; 04541 to->right_anchor &= add->right_anchor; 04542 } 04543 04544 static int 04545 is_full_opt_exact_info(OptExactInfo* ex) 04546 { 04547 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); 04548 } 04549 04550 static void 04551 clear_opt_exact_info(OptExactInfo* ex) 04552 { 04553 clear_mml(&ex->mmd); 04554 clear_opt_anc_info(&ex->anc); 04555 ex->reach_end = 0; 04556 ex->ignore_case = -1; /* unset */ 04557 ex->len = 0; 04558 ex->s[0] = '\0'; 04559 } 04560 04561 static void 04562 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) 04563 { 04564 *to = *from; 04565 } 04566 04567 static void 04568 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) 04569 { 04570 int i, j, len; 04571 UChar *p, *end; 04572 OptAncInfo tanc; 04573 04574 if (to->ignore_case < 0) 04575 to->ignore_case = add->ignore_case; 04576 else if (to->ignore_case != add->ignore_case) 04577 return ; /* avoid */ 04578 04579 p = add->s; 04580 end = p + add->len; 04581 for (i = to->len; p < end; ) { 04582 len = enclen(enc, p, end); 04583 if (i + len > OPT_EXACT_MAXLEN) break; 04584 for (j = 0; j < len && p < end; j++) 04585 to->s[i++] = *p++; 04586 } 04587 04588 to->len = i; 04589 to->reach_end = (p == end ? add->reach_end : 0); 04590 04591 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); 04592 if (! to->reach_end) tanc.right_anchor = 0; 04593 copy_opt_anc_info(&to->anc, &tanc); 04594 } 04595 04596 static void 04597 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, 04598 int raw ARG_UNUSED, OnigEncoding enc) 04599 { 04600 int i, j, len; 04601 UChar *p; 04602 04603 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { 04604 len = enclen(enc, p, end); 04605 if (i + len > OPT_EXACT_MAXLEN) break; 04606 for (j = 0; j < len && p < end; j++) 04607 to->s[i++] = *p++; 04608 } 04609 04610 to->len = i; 04611 } 04612 04613 static void 04614 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) 04615 { 04616 int i, j, len; 04617 04618 if (add->len == 0 || to->len == 0) { 04619 clear_opt_exact_info(to); 04620 return ; 04621 } 04622 04623 if (! is_equal_mml(&to->mmd, &add->mmd)) { 04624 clear_opt_exact_info(to); 04625 return ; 04626 } 04627 04628 for (i = 0; i < to->len && i < add->len; ) { 04629 if (to->s[i] != add->s[i]) break; 04630 len = enclen(env->enc, to->s + i, to->s + to->len); 04631 04632 for (j = 1; j < len; j++) { 04633 if (to->s[i+j] != add->s[i+j]) break; 04634 } 04635 if (j < len) break; 04636 i += len; 04637 } 04638 04639 if (! add->reach_end || i < add->len || i < to->len) { 04640 to->reach_end = 0; 04641 } 04642 to->len = i; 04643 if (to->ignore_case < 0) 04644 to->ignore_case = add->ignore_case; 04645 else if (add->ignore_case >= 0) 04646 to->ignore_case |= add->ignore_case; 04647 04648 alt_merge_opt_anc_info(&to->anc, &add->anc); 04649 if (! to->reach_end) to->anc.right_anchor = 0; 04650 } 04651 04652 static void 04653 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) 04654 { 04655 int v1, v2; 04656 04657 v1 = now->len; 04658 v2 = alt->len; 04659 04660 if (v2 == 0) { 04661 return ; 04662 } 04663 else if (v1 == 0) { 04664 copy_opt_exact_info(now, alt); 04665 return ; 04666 } 04667 else if (v1 <= 2 && v2 <= 2) { 04668 /* ByteValTable[x] is big value --> low price */ 04669 v2 = map_position_value(enc, now->s[0]); 04670 v1 = map_position_value(enc, alt->s[0]); 04671 04672 if (now->len > 1) v1 += 5; 04673 if (alt->len > 1) v2 += 5; 04674 } 04675 04676 if (now->ignore_case <= 0) v1 *= 2; 04677 if (alt->ignore_case <= 0) v2 *= 2; 04678 04679 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04680 copy_opt_exact_info(now, alt); 04681 } 04682 04683 static void 04684 clear_opt_map_info(OptMapInfo* map) 04685 { 04686 static const OptMapInfo clean_info = { 04687 {0, 0}, {0, 0}, 0, 04688 { 04689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04700 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04701 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04702 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04703 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04704 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 04705 } 04706 }; 04707 04708 xmemcpy(map, &clean_info, sizeof(OptMapInfo)); 04709 } 04710 04711 static void 04712 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) 04713 { 04714 *to = *from; 04715 } 04716 04717 static void 04718 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) 04719 { 04720 if (map->map[c] == 0) { 04721 map->map[c] = 1; 04722 map->value += map_position_value(enc, c); 04723 } 04724 } 04725 04726 static int 04727 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, 04728 OnigEncoding enc, OnigCaseFoldType case_fold_flag) 04729 { 04730 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04731 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 04732 int i, n; 04733 04734 add_char_opt_map_info(map, p[0], enc); 04735 04736 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); 04737 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); 04738 if (n < 0) return n; 04739 04740 for (i = 0; i < n; i++) { 04741 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); 04742 add_char_opt_map_info(map, buf[0], enc); 04743 } 04744 04745 return 0; 04746 } 04747 04748 static void 04749 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) 04750 { 04751 const int z = 1<<15; /* 32768: something big value */ 04752 04753 int v1, v2; 04754 04755 if (alt->value == 0) return ; 04756 if (now->value == 0) { 04757 copy_opt_map_info(now, alt); 04758 return ; 04759 } 04760 04761 v1 = z / now->value; 04762 v2 = z / alt->value; 04763 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04764 copy_opt_map_info(now, alt); 04765 } 04766 04767 static int 04768 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) 04769 { 04770 #define COMP_EM_BASE 20 04771 int ve, vm; 04772 04773 if (m->value <= 0) return -1; 04774 04775 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2); 04776 vm = COMP_EM_BASE * 5 * 2 / m->value; 04777 return comp_distance_value(&e->mmd, &m->mmd, ve, vm); 04778 } 04779 04780 static void 04781 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) 04782 { 04783 int i, val; 04784 04785 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 04786 if (to->value == 0) return ; 04787 if (add->value == 0 || to->mmd.max < add->mmd.min) { 04788 clear_opt_map_info(to); 04789 return ; 04790 } 04791 04792 alt_merge_mml(&to->mmd, &add->mmd); 04793 04794 val = 0; 04795 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 04796 if (add->map[i]) 04797 to->map[i] = 1; 04798 04799 if (to->map[i]) 04800 val += map_position_value(enc, i); 04801 } 04802 to->value = val; 04803 04804 alt_merge_opt_anc_info(&to->anc, &add->anc); 04805 } 04806 04807 static void 04808 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) 04809 { 04810 copy_mml(&(opt->exb.mmd), mmd); 04811 copy_mml(&(opt->expr.mmd), mmd); 04812 copy_mml(&(opt->map.mmd), mmd); 04813 } 04814 04815 static void 04816 clear_node_opt_info(NodeOptInfo* opt) 04817 { 04818 clear_mml(&opt->len); 04819 clear_opt_anc_info(&opt->anc); 04820 clear_opt_exact_info(&opt->exb); 04821 clear_opt_exact_info(&opt->exm); 04822 clear_opt_exact_info(&opt->expr); 04823 clear_opt_map_info(&opt->map); 04824 } 04825 04826 static void 04827 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) 04828 { 04829 *to = *from; 04830 } 04831 04832 static void 04833 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) 04834 { 04835 int exb_reach, exm_reach; 04836 OptAncInfo tanc; 04837 04838 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); 04839 copy_opt_anc_info(&to->anc, &tanc); 04840 04841 if (add->exb.len > 0 && to->len.max == 0) { 04842 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, 04843 to->len.max, add->len.max); 04844 copy_opt_anc_info(&add->exb.anc, &tanc); 04845 } 04846 04847 if (add->map.value > 0 && to->len.max == 0) { 04848 if (add->map.mmd.max == 0) 04849 add->map.anc.left_anchor |= to->anc.left_anchor; 04850 } 04851 04852 exb_reach = to->exb.reach_end; 04853 exm_reach = to->exm.reach_end; 04854 04855 if (add->len.max != 0) 04856 to->exb.reach_end = to->exm.reach_end = 0; 04857 04858 if (add->exb.len > 0) { 04859 if (exb_reach) { 04860 concat_opt_exact_info(&to->exb, &add->exb, enc); 04861 clear_opt_exact_info(&add->exb); 04862 } 04863 else if (exm_reach) { 04864 concat_opt_exact_info(&to->exm, &add->exb, enc); 04865 clear_opt_exact_info(&add->exb); 04866 } 04867 } 04868 select_opt_exact_info(enc, &to->exm, &add->exb); 04869 select_opt_exact_info(enc, &to->exm, &add->exm); 04870 04871 if (to->expr.len > 0) { 04872 if (add->len.max > 0) { 04873 if (to->expr.len > (int )add->len.max) 04874 to->expr.len = (int )add->len.max; 04875 04876 if (to->expr.mmd.max == 0) 04877 select_opt_exact_info(enc, &to->exb, &to->expr); 04878 else 04879 select_opt_exact_info(enc, &to->exm, &to->expr); 04880 } 04881 } 04882 else if (add->expr.len > 0) { 04883 copy_opt_exact_info(&to->expr, &add->expr); 04884 } 04885 04886 select_opt_map_info(&to->map, &add->map); 04887 04888 add_mml(&to->len, &add->len); 04889 } 04890 04891 static void 04892 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) 04893 { 04894 alt_merge_opt_anc_info (&to->anc, &add->anc); 04895 alt_merge_opt_exact_info(&to->exb, &add->exb, env); 04896 alt_merge_opt_exact_info(&to->exm, &add->exm, env); 04897 alt_merge_opt_exact_info(&to->expr, &add->expr, env); 04898 alt_merge_opt_map_info(env->enc, &to->map, &add->map); 04899 04900 alt_merge_mml(&to->len, &add->len); 04901 } 04902 04903 04904 #define MAX_NODE_OPT_INFO_REF_COUNT 5 04905 04906 static int 04907 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) 04908 { 04909 int type; 04910 int r = 0; 04911 04912 clear_node_opt_info(opt); 04913 set_bound_node_opt_info(opt, &env->mmd); 04914 04915 type = NTYPE(node); 04916 switch (type) { 04917 case NT_LIST: 04918 { 04919 OptEnv nenv; 04920 NodeOptInfo nopt; 04921 Node* nd = node; 04922 04923 copy_opt_env(&nenv, env); 04924 do { 04925 r = optimize_node_left(NCAR(nd), &nopt, &nenv); 04926 if (r == 0) { 04927 add_mml(&nenv.mmd, &nopt.len); 04928 concat_left_node_opt_info(env->enc, opt, &nopt); 04929 } 04930 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); 04931 } 04932 break; 04933 04934 case NT_ALT: 04935 { 04936 NodeOptInfo nopt; 04937 Node* nd = node; 04938 04939 do { 04940 r = optimize_node_left(NCAR(nd), &nopt, env); 04941 if (r == 0) { 04942 if (nd == node) copy_node_opt_info(opt, &nopt); 04943 else alt_merge_node_opt_info(opt, &nopt, env); 04944 } 04945 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); 04946 } 04947 break; 04948 04949 case NT_STR: 04950 { 04951 StrNode* sn = NSTR(node); 04952 OnigDistance slen = sn->end - sn->s; 04953 int is_raw = NSTRING_IS_RAW(node); 04954 04955 if (! NSTRING_IS_AMBIG(node)) { 04956 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04957 is_raw, env->enc); 04958 opt->exb.ignore_case = 0; 04959 if (slen > 0) { 04960 add_char_opt_map_info(&opt->map, *(sn->s), env->enc); 04961 } 04962 set_mml(&opt->len, slen, slen); 04963 } 04964 else { 04965 OnigDistance max; 04966 04967 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { 04968 int n = onigenc_strlen(env->enc, sn->s, sn->end); 04969 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; 04970 } 04971 else { 04972 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04973 is_raw, env->enc); 04974 opt->exb.ignore_case = 1; 04975 04976 if (slen > 0) { 04977 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, 04978 env->enc, env->case_fold_flag); 04979 if (r != 0) break; 04980 } 04981 04982 max = slen; 04983 } 04984 04985 set_mml(&opt->len, slen, max); 04986 } 04987 04988 if ((OnigDistance )opt->exb.len == slen) 04989 opt->exb.reach_end = 1; 04990 } 04991 break; 04992 04993 case NT_CCLASS: 04994 { 04995 int i, z; 04996 CClassNode* cc = NCCLASS(node); 04997 04998 /* no need to check ignore case. (set in setup_tree()) */ 04999 05000 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { 05001 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 05002 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 05003 05004 set_mml(&opt->len, min, max); 05005 } 05006 else { 05007 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 05008 z = BITSET_AT(cc->bs, i); 05009 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { 05010 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05011 } 05012 } 05013 set_mml(&opt->len, 1, 1); 05014 } 05015 } 05016 break; 05017 05018 case NT_CTYPE: 05019 { 05020 int i, min, max; 05021 int maxcode; 05022 05023 max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 05024 05025 if (max == 1) { 05026 min = 1; 05027 05028 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE; 05029 switch (NCTYPE(node)->ctype) { 05030 case ONIGENC_CTYPE_WORD: 05031 if (NCTYPE(node)->not != 0) { 05032 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 05033 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) { 05034 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05035 } 05036 } 05037 } 05038 else { 05039 for (i = 0; i < maxcode; i++) { 05040 if (ONIGENC_IS_CODE_WORD(env->enc, i)) { 05041 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05042 } 05043 } 05044 } 05045 break; 05046 } 05047 } 05048 else { 05049 min = ONIGENC_MBC_MINLEN(env->enc); 05050 } 05051 set_mml(&opt->len, min, max); 05052 } 05053 break; 05054 05055 case NT_CANY: 05056 { 05057 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 05058 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 05059 set_mml(&opt->len, min, max); 05060 } 05061 break; 05062 05063 case NT_ANCHOR: 05064 switch (NANCHOR(node)->type) { 05065 case ANCHOR_BEGIN_BUF: 05066 case ANCHOR_BEGIN_POSITION: 05067 case ANCHOR_BEGIN_LINE: 05068 case ANCHOR_END_BUF: 05069 case ANCHOR_SEMI_END_BUF: 05070 case ANCHOR_END_LINE: 05071 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ 05072 add_opt_anc_info(&opt->anc, NANCHOR(node)->type); 05073 break; 05074 05075 case ANCHOR_PREC_READ: 05076 { 05077 NodeOptInfo nopt; 05078 05079 r = optimize_node_left(NANCHOR(node)->target, &nopt, env); 05080 if (r == 0) { 05081 if (nopt.exb.len > 0) 05082 copy_opt_exact_info(&opt->expr, &nopt.exb); 05083 else if (nopt.exm.len > 0) 05084 copy_opt_exact_info(&opt->expr, &nopt.exm); 05085 05086 opt->expr.reach_end = 0; 05087 05088 if (nopt.map.value > 0) 05089 copy_opt_map_info(&opt->map, &nopt.map); 05090 } 05091 } 05092 break; 05093 05094 case ANCHOR_PREC_READ_NOT: 05095 case ANCHOR_LOOK_BEHIND_NOT: 05096 break; 05097 } 05098 break; 05099 05100 case NT_BREF: 05101 { 05102 int i; 05103 int* backs; 05104 OnigDistance min, max, tmin, tmax; 05105 Node** nodes = SCANENV_MEM_NODES(env->scan_env); 05106 BRefNode* br = NBREF(node); 05107 05108 if (br->state & NST_RECURSION) { 05109 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 05110 break; 05111 } 05112 backs = BACKREFS_P(br); 05113 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); 05114 if (r != 0) break; 05115 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); 05116 if (r != 0) break; 05117 for (i = 1; i < br->back_num; i++) { 05118 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); 05119 if (r != 0) break; 05120 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); 05121 if (r != 0) break; 05122 if (min > tmin) min = tmin; 05123 if (max < tmax) max = tmax; 05124 } 05125 if (r == 0) set_mml(&opt->len, min, max); 05126 } 05127 break; 05128 05129 #ifdef USE_SUBEXP_CALL 05130 case NT_CALL: 05131 if (IS_CALL_RECURSION(NCALL(node))) 05132 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 05133 else { 05134 OnigOptionType save = env->options; 05135 env->options = NENCLOSE(NCALL(node)->target)->option; 05136 r = optimize_node_left(NCALL(node)->target, opt, env); 05137 env->options = save; 05138 } 05139 break; 05140 #endif 05141 05142 case NT_QTFR: 05143 { 05144 int i; 05145 OnigDistance min, max; 05146 NodeOptInfo nopt; 05147 QtfrNode* qn = NQTFR(node); 05148 05149 r = optimize_node_left(qn->target, &nopt, env); 05150 if (r) break; 05151 05152 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) { 05153 if (env->mmd.max == 0 && 05154 NTYPE(qn->target) == NT_CANY && qn->greedy) { 05155 if (IS_MULTILINE(env->options)) 05156 /* implicit anchor: /.*a/ ==> /\A.*a/ */ 05157 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); 05158 else 05159 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); 05160 } 05161 } 05162 else { 05163 if (qn->lower > 0) { 05164 copy_node_opt_info(opt, &nopt); 05165 if (nopt.exb.len > 0) { 05166 if (nopt.exb.reach_end) { 05167 for (i = 2; i <= qn->lower && 05168 ! is_full_opt_exact_info(&opt->exb); i++) { 05169 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); 05170 } 05171 if (i < qn->lower) { 05172 opt->exb.reach_end = 0; 05173 } 05174 } 05175 } 05176 05177 if (qn->lower != qn->upper) { 05178 opt->exb.reach_end = 0; 05179 opt->exm.reach_end = 0; 05180 } 05181 if (qn->lower > 1) 05182 opt->exm.reach_end = 0; 05183 } 05184 } 05185 05186 min = distance_multiply(nopt.len.min, qn->lower); 05187 if (IS_REPEAT_INFINITE(qn->upper)) 05188 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); 05189 else 05190 max = distance_multiply(nopt.len.max, qn->upper); 05191 05192 set_mml(&opt->len, min, max); 05193 } 05194 break; 05195 05196 case NT_ENCLOSE: 05197 { 05198 EncloseNode* en = NENCLOSE(node); 05199 05200 switch (en->type) { 05201 case ENCLOSE_OPTION: 05202 { 05203 OnigOptionType save = env->options; 05204 05205 env->options = en->option; 05206 r = optimize_node_left(en->target, opt, env); 05207 env->options = save; 05208 } 05209 break; 05210 05211 case ENCLOSE_MEMORY: 05212 #ifdef USE_SUBEXP_CALL 05213 en->opt_count++; 05214 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { 05215 OnigDistance min, max; 05216 05217 min = 0; 05218 max = ONIG_INFINITE_DISTANCE; 05219 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; 05220 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; 05221 set_mml(&opt->len, min, max); 05222 } 05223 else 05224 #endif 05225 { 05226 r = optimize_node_left(en->target, opt, env); 05227 05228 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { 05229 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) 05230 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); 05231 } 05232 } 05233 break; 05234 05235 case ENCLOSE_STOP_BACKTRACK: 05236 case ENCLOSE_CONDITION: 05237 r = optimize_node_left(en->target, opt, env); 05238 break; 05239 } 05240 } 05241 break; 05242 05243 default: 05244 #ifdef ONIG_DEBUG 05245 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n", 05246 NTYPE(node)); 05247 #endif 05248 r = ONIGERR_TYPE_BUG; 05249 break; 05250 } 05251 05252 return r; 05253 } 05254 05255 static int 05256 set_optimize_exact_info(regex_t* reg, OptExactInfo* e) 05257 { 05258 int r; 05259 int allow_reverse; 05260 05261 if (e->len == 0) return 0; 05262 05263 reg->exact = (UChar* )xmalloc(e->len); 05264 CHECK_NULL_RETURN_MEMERR(reg->exact); 05265 xmemcpy(reg->exact, e->s, e->len); 05266 reg->exact_end = reg->exact + e->len; 05267 05268 allow_reverse = 05269 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); 05270 05271 if (e->ignore_case > 0) { 05272 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 05273 r = set_bm_skip(reg->exact, reg->exact_end, reg, 05274 reg->map, &(reg->int_map), 1); 05275 if (r == 0) { 05276 reg->optimize = (allow_reverse != 0 05277 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); 05278 } 05279 else { 05280 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 05281 } 05282 } 05283 else { 05284 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 05285 } 05286 } 05287 else { 05288 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 05289 r = set_bm_skip(reg->exact, reg->exact_end, reg, 05290 reg->map, &(reg->int_map), 0); 05291 if (r) return r; 05292 05293 reg->optimize = (allow_reverse != 0 05294 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); 05295 } 05296 else { 05297 reg->optimize = ONIG_OPTIMIZE_EXACT; 05298 } 05299 } 05300 05301 reg->dmin = e->mmd.min; 05302 reg->dmax = e->mmd.max; 05303 05304 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 05305 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact)); 05306 } 05307 05308 return 0; 05309 } 05310 05311 static void 05312 set_optimize_map_info(regex_t* reg, OptMapInfo* m) 05313 { 05314 int i; 05315 05316 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05317 reg->map[i] = m->map[i]; 05318 05319 reg->optimize = ONIG_OPTIMIZE_MAP; 05320 reg->dmin = m->mmd.min; 05321 reg->dmax = m->mmd.max; 05322 05323 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 05324 reg->threshold_len = (int )(reg->dmin + 1); 05325 } 05326 } 05327 05328 static void 05329 set_sub_anchor(regex_t* reg, OptAncInfo* anc) 05330 { 05331 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; 05332 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; 05333 } 05334 05335 #ifdef ONIG_DEBUG 05336 static void print_optimize_info(FILE* f, regex_t* reg); 05337 #endif 05338 05339 static int 05340 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) 05341 { 05342 05343 int r; 05344 NodeOptInfo opt; 05345 OptEnv env; 05346 05347 env.enc = reg->enc; 05348 env.options = reg->options; 05349 env.case_fold_flag = reg->case_fold_flag; 05350 env.scan_env = scan_env; 05351 clear_mml(&env.mmd); 05352 05353 r = optimize_node_left(node, &opt, &env); 05354 if (r) return r; 05355 05356 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | 05357 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | 05358 ANCHOR_LOOK_BEHIND); 05359 05360 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); 05361 05362 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { 05363 reg->anchor_dmin = opt.len.min; 05364 reg->anchor_dmax = opt.len.max; 05365 } 05366 05367 if (opt.exb.len > 0 || opt.exm.len > 0) { 05368 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); 05369 if (opt.map.value > 0 && 05370 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { 05371 goto set_map; 05372 } 05373 else { 05374 r = set_optimize_exact_info(reg, &opt.exb); 05375 set_sub_anchor(reg, &opt.exb.anc); 05376 } 05377 } 05378 else if (opt.map.value > 0) { 05379 set_map: 05380 set_optimize_map_info(reg, &opt.map); 05381 set_sub_anchor(reg, &opt.map.anc); 05382 } 05383 else { 05384 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; 05385 if (opt.len.max == 0) 05386 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; 05387 } 05388 05389 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) 05390 if (!onig_is_prelude()) print_optimize_info(stderr, reg); 05391 #endif 05392 return r; 05393 } 05394 05395 static void 05396 clear_optimize_info(regex_t* reg) 05397 { 05398 reg->optimize = ONIG_OPTIMIZE_NONE; 05399 reg->anchor = 0; 05400 reg->anchor_dmin = 0; 05401 reg->anchor_dmax = 0; 05402 reg->sub_anchor = 0; 05403 reg->exact_end = (UChar* )NULL; 05404 reg->threshold_len = 0; 05405 if (IS_NOT_NULL(reg->exact)) { 05406 xfree(reg->exact); 05407 reg->exact = (UChar* )NULL; 05408 } 05409 } 05410 05411 #ifdef ONIG_DEBUG 05412 05413 static void print_enc_string(FILE* fp, OnigEncoding enc, 05414 const UChar *s, const UChar *end) 05415 { 05416 fprintf(fp, "\nPATTERN: /"); 05417 05418 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05419 const UChar *p; 05420 OnigCodePoint code; 05421 05422 p = s; 05423 while (p < end) { 05424 code = ONIGENC_MBC_TO_CODE(enc, p, end); 05425 if (code >= 0x80) { 05426 fprintf(fp, " 0x%04x ", (int )code); 05427 } 05428 else { 05429 fputc((int )code, fp); 05430 } 05431 05432 p += enclen(enc, p, end); 05433 } 05434 } 05435 else { 05436 while (s < end) { 05437 fputc((int )*s, fp); 05438 s++; 05439 } 05440 } 05441 05442 fprintf(fp, "/ (%s)\n", enc->name); 05443 } 05444 05445 static void 05446 print_distance_range(FILE* f, OnigDistance a, OnigDistance b) 05447 { 05448 if (a == ONIG_INFINITE_DISTANCE) 05449 fputs("inf", f); 05450 else 05451 fprintf(f, "(%"PRIuSIZE")", a); 05452 05453 fputs("-", f); 05454 05455 if (b == ONIG_INFINITE_DISTANCE) 05456 fputs("inf", f); 05457 else 05458 fprintf(f, "(%"PRIuSIZE")", b); 05459 } 05460 05461 static void 05462 print_anchor(FILE* f, int anchor) 05463 { 05464 int q = 0; 05465 05466 fprintf(f, "["); 05467 05468 if (anchor & ANCHOR_BEGIN_BUF) { 05469 fprintf(f, "begin-buf"); 05470 q = 1; 05471 } 05472 if (anchor & ANCHOR_BEGIN_LINE) { 05473 if (q) fprintf(f, ", "); 05474 q = 1; 05475 fprintf(f, "begin-line"); 05476 } 05477 if (anchor & ANCHOR_BEGIN_POSITION) { 05478 if (q) fprintf(f, ", "); 05479 q = 1; 05480 fprintf(f, "begin-pos"); 05481 } 05482 if (anchor & ANCHOR_END_BUF) { 05483 if (q) fprintf(f, ", "); 05484 q = 1; 05485 fprintf(f, "end-buf"); 05486 } 05487 if (anchor & ANCHOR_SEMI_END_BUF) { 05488 if (q) fprintf(f, ", "); 05489 q = 1; 05490 fprintf(f, "semi-end-buf"); 05491 } 05492 if (anchor & ANCHOR_END_LINE) { 05493 if (q) fprintf(f, ", "); 05494 q = 1; 05495 fprintf(f, "end-line"); 05496 } 05497 if (anchor & ANCHOR_ANYCHAR_STAR) { 05498 if (q) fprintf(f, ", "); 05499 q = 1; 05500 fprintf(f, "anychar-star"); 05501 } 05502 if (anchor & ANCHOR_ANYCHAR_STAR_ML) { 05503 if (q) fprintf(f, ", "); 05504 fprintf(f, "anychar-star-ml"); 05505 } 05506 05507 fprintf(f, "]"); 05508 } 05509 05510 static void 05511 print_optimize_info(FILE* f, regex_t* reg) 05512 { 05513 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", 05514 "EXACT_IC", "MAP", 05515 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" }; 05516 05517 fprintf(f, "optimize: %s\n", on[reg->optimize]); 05518 fprintf(f, " anchor: "); print_anchor(f, reg->anchor); 05519 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) 05520 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); 05521 fprintf(f, "\n"); 05522 05523 if (reg->optimize) { 05524 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); 05525 fprintf(f, "\n"); 05526 } 05527 fprintf(f, "\n"); 05528 05529 if (reg->exact) { 05530 UChar *p; 05531 fprintf(f, "exact: ["); 05532 for (p = reg->exact; p < reg->exact_end; p++) { 05533 fputc(*p, f); 05534 } 05535 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); 05536 } 05537 else if (reg->optimize & ONIG_OPTIMIZE_MAP) { 05538 int c, i, n = 0; 05539 05540 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05541 if (reg->map[i]) n++; 05542 05543 fprintf(f, "map: n=%d\n", n); 05544 if (n > 0) { 05545 c = 0; 05546 fputc('[', f); 05547 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 05548 if (reg->map[i] != 0) { 05549 if (c > 0) fputs(", ", f); 05550 c++; 05551 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && 05552 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) 05553 fputc(i, f); 05554 else 05555 fprintf(f, "%d", i); 05556 } 05557 } 05558 fprintf(f, "]\n"); 05559 } 05560 } 05561 } 05562 #endif /* ONIG_DEBUG */ 05563 05564 05565 extern void 05566 onig_free_body(regex_t* reg) 05567 { 05568 if (IS_NOT_NULL(reg)) { 05569 if (IS_NOT_NULL(reg->p)) xfree(reg->p); 05570 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); 05571 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); 05572 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); 05573 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); 05574 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); 05575 05576 #ifdef USE_NAMED_GROUP 05577 onig_names_free(reg); 05578 #endif 05579 } 05580 } 05581 05582 extern void 05583 onig_free(regex_t* reg) 05584 { 05585 if (IS_NOT_NULL(reg)) { 05586 onig_free_body(reg); 05587 xfree(reg); 05588 } 05589 } 05590 05591 size_t 05592 onig_memsize(const regex_t *reg) 05593 { 05594 size_t size = sizeof(regex_t); 05595 if (IS_NULL(reg)) return 0; 05596 if (IS_NOT_NULL(reg->p)) size += reg->alloc; 05597 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; 05598 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05599 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05600 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); 05601 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); 05602 05603 return size; 05604 } 05605 05606 size_t 05607 onig_region_memsize(const OnigRegion *regs) 05608 { 05609 size_t size = sizeof(*regs); 05610 if (IS_NULL(regs)) return 0; 05611 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end)); 05612 return size; 05613 } 05614 05615 #define REGEX_TRANSFER(to,from) do {\ 05616 (to)->state = ONIG_STATE_MODIFY;\ 05617 onig_free_body(to);\ 05618 xmemcpy(to, from, sizeof(regex_t));\ 05619 xfree(from);\ 05620 } while (0) 05621 05622 extern void 05623 onig_transfer(regex_t* to, regex_t* from) 05624 { 05625 THREAD_ATOMIC_START; 05626 REGEX_TRANSFER(to, from); 05627 THREAD_ATOMIC_END; 05628 } 05629 05630 #define REGEX_CHAIN_HEAD(reg) do {\ 05631 while (IS_NOT_NULL((reg)->chain)) {\ 05632 (reg) = (reg)->chain;\ 05633 }\ 05634 } while (0) 05635 05636 extern void 05637 onig_chain_link_add(regex_t* to, regex_t* add) 05638 { 05639 THREAD_ATOMIC_START; 05640 REGEX_CHAIN_HEAD(to); 05641 to->chain = add; 05642 THREAD_ATOMIC_END; 05643 } 05644 05645 extern void 05646 onig_chain_reduce(regex_t* reg) 05647 { 05648 regex_t *head, *prev; 05649 05650 prev = reg; 05651 head = prev->chain; 05652 if (IS_NOT_NULL(head)) { 05653 reg->state = ONIG_STATE_MODIFY; 05654 while (IS_NOT_NULL(head->chain)) { 05655 prev = head; 05656 head = head->chain; 05657 } 05658 prev->chain = (regex_t* )NULL; 05659 REGEX_TRANSFER(reg, head); 05660 } 05661 } 05662 05663 #ifdef ONIG_DEBUG 05664 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); 05665 #endif 05666 #ifdef ONIG_DEBUG_PARSE_TREE 05667 static void print_tree P_((FILE* f, Node* node)); 05668 #endif 05669 05670 extern int 05671 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05672 OnigErrorInfo* einfo, const char *sourcefile, int sourceline) 05673 { 05674 #define COMPILE_INIT_SIZE 20 05675 05676 int r; 05677 OnigDistance init_size; 05678 Node* root; 05679 ScanEnv scan_env = {0}; 05680 #ifdef USE_SUBEXP_CALL 05681 UnsetAddrList uslist; 05682 #endif 05683 05684 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 05685 05686 scan_env.sourcefile = sourcefile; 05687 scan_env.sourceline = sourceline; 05688 reg->state = ONIG_STATE_COMPILING; 05689 05690 #ifdef ONIG_DEBUG 05691 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end); 05692 #endif 05693 05694 if (reg->alloc == 0) { 05695 init_size = (pattern_end - pattern) * 2; 05696 if (init_size <= 0) init_size = COMPILE_INIT_SIZE; 05697 r = BBUF_INIT(reg, init_size); 05698 if (r != 0) goto end; 05699 } 05700 else 05701 reg->used = 0; 05702 05703 reg->num_mem = 0; 05704 reg->num_repeat = 0; 05705 reg->num_null_check = 0; 05706 reg->repeat_range_alloc = 0; 05707 reg->repeat_range = (OnigRepeatRange* )NULL; 05708 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05709 reg->num_comb_exp_check = 0; 05710 #endif 05711 05712 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); 05713 if (r != 0) goto err; 05714 05715 #ifdef ONIG_DEBUG_PARSE_TREE 05716 # if 0 05717 fprintf(stderr, "ORIGINAL PARSE TREE:\n"); 05718 if (!onig_is_prelude()) { 05719 print_tree(stderr, root); 05720 } 05721 # endif 05722 #endif 05723 05724 #ifdef USE_NAMED_GROUP 05725 /* mixed use named group and no-named group */ 05726 if (scan_env.num_named > 0 && 05727 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 05728 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { 05729 if (scan_env.num_named != scan_env.num_mem) 05730 r = disable_noname_group_capture(&root, reg, &scan_env); 05731 else 05732 r = numbered_ref_check(root); 05733 05734 if (r != 0) goto err; 05735 } 05736 #endif 05737 05738 #ifdef USE_SUBEXP_CALL 05739 if (scan_env.num_call > 0) { 05740 r = unset_addr_list_init(&uslist, scan_env.num_call); 05741 if (r != 0) goto err; 05742 scan_env.unset_addr_list = &uslist; 05743 r = setup_subexp_call(root, &scan_env); 05744 if (r != 0) goto err_unset; 05745 r = subexp_recursive_check_trav(root, &scan_env); 05746 if (r < 0) goto err_unset; 05747 r = subexp_inf_recursive_check_trav(root, &scan_env); 05748 if (r != 0) goto err_unset; 05749 05750 reg->num_call = scan_env.num_call; 05751 } 05752 else 05753 reg->num_call = 0; 05754 #endif 05755 05756 r = setup_tree(root, reg, IN_ROOT, &scan_env); 05757 if (r != 0) goto err_unset; 05758 05759 #ifdef ONIG_DEBUG_PARSE_TREE 05760 if (!onig_is_prelude()) print_tree(stderr, root); 05761 #endif 05762 05763 reg->capture_history = scan_env.capture_history; 05764 reg->bt_mem_start = scan_env.bt_mem_start; 05765 reg->bt_mem_start |= reg->capture_history; 05766 if (IS_FIND_CONDITION(reg->options)) 05767 BIT_STATUS_ON_ALL(reg->bt_mem_end); 05768 else { 05769 reg->bt_mem_end = scan_env.bt_mem_end; 05770 reg->bt_mem_end |= reg->capture_history; 05771 } 05772 05773 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05774 if (scan_env.backrefed_mem == 0 05775 #ifdef USE_SUBEXP_CALL 05776 || scan_env.num_call == 0 05777 #endif 05778 ) { 05779 setup_comb_exp_check(root, 0, &scan_env); 05780 #ifdef USE_SUBEXP_CALL 05781 if (scan_env.has_recursion != 0) { 05782 scan_env.num_comb_exp_check = 0; 05783 } 05784 else 05785 #endif 05786 if (scan_env.comb_exp_max_regnum > 0) { 05787 int i; 05788 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { 05789 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { 05790 scan_env.num_comb_exp_check = 0; 05791 break; 05792 } 05793 } 05794 } 05795 } 05796 05797 reg->num_comb_exp_check = scan_env.num_comb_exp_check; 05798 #endif 05799 05800 clear_optimize_info(reg); 05801 #ifndef ONIG_DONT_OPTIMIZE 05802 r = set_optimize_info_from_tree(root, reg, &scan_env); 05803 if (r != 0) goto err_unset; 05804 #endif 05805 05806 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { 05807 xfree(scan_env.mem_nodes_dynamic); 05808 scan_env.mem_nodes_dynamic = (Node** )NULL; 05809 } 05810 05811 r = compile_tree(root, reg); 05812 if (r == 0) { 05813 r = add_opcode(reg, OP_END); 05814 #ifdef USE_SUBEXP_CALL 05815 if (scan_env.num_call > 0) { 05816 r = unset_addr_list_fix(&uslist, reg); 05817 unset_addr_list_end(&uslist); 05818 if (r) goto err; 05819 } 05820 #endif 05821 05822 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) 05823 reg->stack_pop_level = STACK_POP_LEVEL_ALL; 05824 else { 05825 if (reg->bt_mem_start != 0) 05826 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; 05827 else 05828 reg->stack_pop_level = STACK_POP_LEVEL_FREE; 05829 } 05830 } 05831 #ifdef USE_SUBEXP_CALL 05832 else if (scan_env.num_call > 0) { 05833 unset_addr_list_end(&uslist); 05834 } 05835 #endif 05836 onig_node_free(root); 05837 05838 #ifdef ONIG_DEBUG_COMPILE 05839 #ifdef USE_NAMED_GROUP 05840 if (!onig_is_prelude()) onig_print_names(stderr, reg); 05841 #endif 05842 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg); 05843 #endif 05844 05845 end: 05846 reg->state = ONIG_STATE_NORMAL; 05847 return r; 05848 05849 err_unset: 05850 #ifdef USE_SUBEXP_CALL 05851 if (scan_env.num_call > 0) { 05852 unset_addr_list_end(&uslist); 05853 } 05854 #endif 05855 err: 05856 if (IS_NOT_NULL(scan_env.error)) { 05857 if (IS_NOT_NULL(einfo)) { 05858 einfo->enc = scan_env.enc; 05859 einfo->par = scan_env.error; 05860 einfo->par_end = scan_env.error_end; 05861 } 05862 } 05863 05864 onig_node_free(root); 05865 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) 05866 xfree(scan_env.mem_nodes_dynamic); 05867 return r; 05868 } 05869 05870 #ifdef USE_RECOMPILE_API 05871 extern int 05872 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05873 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 05874 OnigErrorInfo* einfo) 05875 { 05876 int r; 05877 regex_t *new_reg; 05878 05879 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); 05880 if (r) return r; 05881 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 05882 onig_transfer(reg, new_reg); 05883 } 05884 else { 05885 onig_chain_link_add(reg, new_reg); 05886 } 05887 return 0; 05888 } 05889 #endif 05890 05891 static int onig_inited = 0; 05892 05893 extern int 05894 onig_reg_init(regex_t* reg, OnigOptionType option, 05895 OnigCaseFoldType case_fold_flag, 05896 OnigEncoding enc, const OnigSyntaxType* syntax) 05897 { 05898 if (! onig_inited) 05899 onig_init(); 05900 05901 if (IS_NULL(reg)) 05902 return ONIGERR_INVALID_ARGUMENT; 05903 05904 if (ONIGENC_IS_UNDEF(enc)) 05905 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET; 05906 05907 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) 05908 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { 05909 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; 05910 } 05911 05912 (reg)->state = ONIG_STATE_MODIFY; 05913 05914 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { 05915 option |= syntax->options; 05916 option &= ~ONIG_OPTION_SINGLELINE; 05917 } 05918 else 05919 option |= syntax->options; 05920 05921 (reg)->enc = enc; 05922 (reg)->options = option; 05923 (reg)->syntax = syntax; 05924 (reg)->optimize = 0; 05925 (reg)->exact = (UChar* )NULL; 05926 (reg)->int_map = (int* )NULL; 05927 (reg)->int_map_backward = (int* )NULL; 05928 (reg)->chain = (regex_t* )NULL; 05929 05930 (reg)->p = (UChar* )NULL; 05931 (reg)->alloc = 0; 05932 (reg)->used = 0; 05933 (reg)->name_table = (void* )NULL; 05934 05935 (reg)->case_fold_flag = case_fold_flag; 05936 return 0; 05937 } 05938 05939 extern int 05940 onig_new_without_alloc(regex_t* reg, const UChar* pattern, 05941 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, 05942 OnigSyntaxType* syntax, OnigErrorInfo* einfo) 05943 { 05944 int r; 05945 05946 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05947 if (r) return r; 05948 05949 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); 05950 return r; 05951 } 05952 05953 extern int 05954 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 05955 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, 05956 OnigErrorInfo* einfo) 05957 { 05958 int r; 05959 05960 *reg = (regex_t* )xmalloc(sizeof(regex_t)); 05961 if (IS_NULL(*reg)) return ONIGERR_MEMORY; 05962 05963 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05964 if (r) goto err; 05965 05966 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); 05967 if (r) { 05968 err: 05969 onig_free(*reg); 05970 *reg = NULL; 05971 } 05972 return r; 05973 } 05974 05975 05976 extern int 05977 onig_init(void) 05978 { 05979 if (onig_inited != 0) 05980 return 0; 05981 05982 THREAD_SYSTEM_INIT; 05983 THREAD_ATOMIC_START; 05984 05985 onig_inited = 1; 05986 05987 onigenc_init(); 05988 /* onigenc_set_default_caseconv_table((UChar* )0); */ 05989 05990 #ifdef ONIG_DEBUG_STATISTICS 05991 onig_statistics_init(); 05992 #endif 05993 05994 THREAD_ATOMIC_END; 05995 return 0; 05996 } 05997 05998 05999 extern int 06000 onig_end(void) 06001 { 06002 THREAD_ATOMIC_START; 06003 06004 #ifdef ONIG_DEBUG_STATISTICS 06005 if (!onig_is_prelude()) onig_print_statistics(stderr); 06006 #endif 06007 06008 #ifdef USE_SHARED_CCLASS_TABLE 06009 onig_free_shared_cclass_table(); 06010 #endif 06011 06012 #ifdef USE_PARSE_TREE_NODE_RECYCLE 06013 onig_free_node_list(); 06014 #endif 06015 06016 onig_inited = 0; 06017 06018 THREAD_ATOMIC_END; 06019 THREAD_SYSTEM_END; 06020 return 0; 06021 } 06022 06023 extern int 06024 onig_is_in_code_range(const UChar* p, OnigCodePoint code) 06025 { 06026 OnigCodePoint n, *data; 06027 OnigCodePoint low, high, x; 06028 06029 GET_CODE_POINT(n, p); 06030 data = (OnigCodePoint* )p; 06031 data++; 06032 06033 for (low = 0, high = n; low < high; ) { 06034 x = (low + high) >> 1; 06035 if (code > data[x * 2 + 1]) 06036 low = x + 1; 06037 else 06038 high = x; 06039 } 06040 06041 return ((low < n && code >= data[low * 2]) ? 1 : 0); 06042 } 06043 06044 extern int 06045 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) 06046 { 06047 int found; 06048 06049 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { 06050 if (IS_NULL(cc->mbuf)) { 06051 found = 0; 06052 } 06053 else { 06054 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); 06055 } 06056 } 06057 else { 06058 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); 06059 } 06060 06061 if (IS_NCCLASS_NOT(cc)) 06062 return !found; 06063 else 06064 return found; 06065 } 06066 06067 extern int 06068 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) 06069 { 06070 int len; 06071 06072 if (ONIGENC_MBC_MINLEN(enc) > 1) { 06073 len = 2; 06074 } 06075 else { 06076 len = ONIGENC_CODE_TO_MBCLEN(enc, code); 06077 } 06078 return onig_is_code_in_cc_len(len, code, cc); 06079 } 06080 06081 06082 #ifdef ONIG_DEBUG 06083 06084 /* arguments type */ 06085 #define ARG_SPECIAL -1 06086 #define ARG_NON 0 06087 #define ARG_RELADDR 1 06088 #define ARG_ABSADDR 2 06089 #define ARG_LENGTH 3 06090 #define ARG_MEMNUM 4 06091 #define ARG_OPTION 5 06092 #define ARG_STATE_CHECK 6 06093 06094 OnigOpInfoType OnigOpInfo[] = { 06095 { OP_FINISH, "finish", ARG_NON }, 06096 { OP_END, "end", ARG_NON }, 06097 { OP_EXACT1, "exact1", ARG_SPECIAL }, 06098 { OP_EXACT2, "exact2", ARG_SPECIAL }, 06099 { OP_EXACT3, "exact3", ARG_SPECIAL }, 06100 { OP_EXACT4, "exact4", ARG_SPECIAL }, 06101 { OP_EXACT5, "exact5", ARG_SPECIAL }, 06102 { OP_EXACTN, "exactn", ARG_SPECIAL }, 06103 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, 06104 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, 06105 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, 06106 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, 06107 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, 06108 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, 06109 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, 06110 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, 06111 { OP_CCLASS, "cclass", ARG_SPECIAL }, 06112 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, 06113 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, 06114 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, 06115 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, 06116 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, 06117 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, 06118 { OP_ANYCHAR, "anychar", ARG_NON }, 06119 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, 06120 { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, 06121 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, 06122 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, 06123 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, 06124 { OP_WORD, "word", ARG_NON }, 06125 { OP_NOT_WORD, "not-word", ARG_NON }, 06126 { OP_WORD_BOUND, "word-bound", ARG_NON }, 06127 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, 06128 { OP_WORD_BEGIN, "word-begin", ARG_NON }, 06129 { OP_WORD_END, "word-end", ARG_NON }, 06130 { OP_ASCII_WORD, "ascii-word", ARG_NON }, 06131 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON }, 06132 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON }, 06133 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON }, 06134 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON }, 06135 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON }, 06136 { OP_BEGIN_BUF, "begin-buf", ARG_NON }, 06137 { OP_END_BUF, "end-buf", ARG_NON }, 06138 { OP_BEGIN_LINE, "begin-line", ARG_NON }, 06139 { OP_END_LINE, "end-line", ARG_NON }, 06140 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, 06141 { OP_BEGIN_POSITION, "begin-position", ARG_NON }, 06142 { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON }, 06143 { OP_BACKREF1, "backref1", ARG_NON }, 06144 { OP_BACKREF2, "backref2", ARG_NON }, 06145 { OP_BACKREFN, "backrefn", ARG_MEMNUM }, 06146 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, 06147 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, 06148 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, 06149 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, 06150 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, 06151 { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, 06152 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, 06153 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, 06154 { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, 06155 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, 06156 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, 06157 { OP_SET_OPTION, "set-option", ARG_OPTION }, 06158 { OP_KEEP, "keep", ARG_NON }, 06159 { OP_FAIL, "fail", ARG_NON }, 06160 { OP_JUMP, "jump", ARG_RELADDR }, 06161 { OP_PUSH, "push", ARG_RELADDR }, 06162 { OP_POP, "pop", ARG_NON }, 06163 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, 06164 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, 06165 { OP_REPEAT, "repeat", ARG_SPECIAL }, 06166 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, 06167 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, 06168 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, 06169 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, 06170 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, 06171 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, 06172 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, 06173 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, 06174 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, 06175 { OP_PUSH_POS, "push-pos", ARG_NON }, 06176 { OP_POP_POS, "pop-pos", ARG_NON }, 06177 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, 06178 { OP_FAIL_POS, "fail-pos", ARG_NON }, 06179 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, 06180 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, 06181 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, 06182 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, 06183 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, 06184 { OP_CALL, "call", ARG_ABSADDR }, 06185 { OP_RETURN, "return", ARG_NON }, 06186 { OP_CONDITION, "condition", ARG_SPECIAL }, 06187 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, 06188 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, 06189 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, 06190 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, 06191 { OP_STATE_CHECK_ANYCHAR_ML_STAR, 06192 "state-check-anychar-ml*", ARG_STATE_CHECK }, 06193 { -1, "", ARG_NON } 06194 }; 06195 06196 static const char* 06197 op2name(int opcode) 06198 { 06199 int i; 06200 06201 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 06202 if (opcode == OnigOpInfo[i].opcode) 06203 return OnigOpInfo[i].name; 06204 } 06205 return ""; 06206 } 06207 06208 static int 06209 op2arg_type(int opcode) 06210 { 06211 int i; 06212 06213 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 06214 if (opcode == OnigOpInfo[i].opcode) 06215 return OnigOpInfo[i].arg_type; 06216 } 06217 return ARG_SPECIAL; 06218 } 06219 06220 static void 06221 Indent(FILE* f, int indent) 06222 { 06223 int i; 06224 for (i = 0; i < indent; i++) putc(' ', f); 06225 } 06226 06227 static void 06228 p_string(FILE* f, int len, UChar* s) 06229 { 06230 fputs(":", f); 06231 while (len-- > 0) { fputc(*s++, f); } 06232 } 06233 06234 static void 06235 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) 06236 { 06237 int x = len * mb_len; 06238 06239 fprintf(f, ":%d:", len); 06240 while (x-- > 0) { fputc(*s++, f); } 06241 } 06242 06243 extern void 06244 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 06245 OnigEncoding enc) 06246 { 06247 int i, n, arg_type; 06248 RelAddrType addr; 06249 LengthType len; 06250 MemNumType mem; 06251 StateCheckNumType scn; 06252 OnigCodePoint code; 06253 UChar *q; 06254 06255 fprintf(f, "[%s", op2name(*bp)); 06256 arg_type = op2arg_type(*bp); 06257 if (arg_type != ARG_SPECIAL) { 06258 bp++; 06259 switch (arg_type) { 06260 case ARG_NON: 06261 break; 06262 case ARG_RELADDR: 06263 GET_RELADDR_INC(addr, bp); 06264 fprintf(f, ":(%d)", addr); 06265 break; 06266 case ARG_ABSADDR: 06267 GET_ABSADDR_INC(addr, bp); 06268 fprintf(f, ":(%d)", addr); 06269 break; 06270 case ARG_LENGTH: 06271 GET_LENGTH_INC(len, bp); 06272 fprintf(f, ":%d", len); 06273 break; 06274 case ARG_MEMNUM: 06275 mem = *((MemNumType* )bp); 06276 bp += SIZE_MEMNUM; 06277 fprintf(f, ":%d", mem); 06278 break; 06279 case ARG_OPTION: 06280 { 06281 OnigOptionType option = *((OnigOptionType* )bp); 06282 bp += SIZE_OPTION; 06283 fprintf(f, ":%d", option); 06284 } 06285 break; 06286 06287 case ARG_STATE_CHECK: 06288 scn = *((StateCheckNumType* )bp); 06289 bp += SIZE_STATE_CHECK_NUM; 06290 fprintf(f, ":%d", scn); 06291 break; 06292 } 06293 } 06294 else { 06295 switch (*bp++) { 06296 case OP_EXACT1: 06297 case OP_ANYCHAR_STAR_PEEK_NEXT: 06298 case OP_ANYCHAR_ML_STAR_PEEK_NEXT: 06299 p_string(f, 1, bp++); break; 06300 case OP_EXACT2: 06301 p_string(f, 2, bp); bp += 2; break; 06302 case OP_EXACT3: 06303 p_string(f, 3, bp); bp += 3; break; 06304 case OP_EXACT4: 06305 p_string(f, 4, bp); bp += 4; break; 06306 case OP_EXACT5: 06307 p_string(f, 5, bp); bp += 5; break; 06308 case OP_EXACTN: 06309 GET_LENGTH_INC(len, bp); 06310 p_len_string(f, len, 1, bp); 06311 bp += len; 06312 break; 06313 06314 case OP_EXACTMB2N1: 06315 p_string(f, 2, bp); bp += 2; break; 06316 case OP_EXACTMB2N2: 06317 p_string(f, 4, bp); bp += 4; break; 06318 case OP_EXACTMB2N3: 06319 p_string(f, 6, bp); bp += 6; break; 06320 case OP_EXACTMB2N: 06321 GET_LENGTH_INC(len, bp); 06322 p_len_string(f, len, 2, bp); 06323 bp += len * 2; 06324 break; 06325 case OP_EXACTMB3N: 06326 GET_LENGTH_INC(len, bp); 06327 p_len_string(f, len, 3, bp); 06328 bp += len * 3; 06329 break; 06330 case OP_EXACTMBN: 06331 { 06332 int mb_len; 06333 06334 GET_LENGTH_INC(mb_len, bp); 06335 GET_LENGTH_INC(len, bp); 06336 fprintf(f, ":%d:%d:", mb_len, len); 06337 n = len * mb_len; 06338 while (n-- > 0) { fputc(*bp++, f); } 06339 } 06340 break; 06341 06342 case OP_EXACT1_IC: 06343 len = enclen(enc, bp, bpend); 06344 p_string(f, len, bp); 06345 bp += len; 06346 break; 06347 case OP_EXACTN_IC: 06348 GET_LENGTH_INC(len, bp); 06349 p_len_string(f, len, 1, bp); 06350 bp += len; 06351 break; 06352 06353 case OP_CCLASS: 06354 n = bitset_on_num((BitSetRef )bp); 06355 bp += SIZE_BITSET; 06356 fprintf(f, ":%d", n); 06357 break; 06358 06359 case OP_CCLASS_NOT: 06360 n = bitset_on_num((BitSetRef )bp); 06361 bp += SIZE_BITSET; 06362 fprintf(f, ":%d", n); 06363 break; 06364 06365 case OP_CCLASS_MB: 06366 case OP_CCLASS_MB_NOT: 06367 GET_LENGTH_INC(len, bp); 06368 q = bp; 06369 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06370 ALIGNMENT_RIGHT(q); 06371 #endif 06372 GET_CODE_POINT(code, q); 06373 bp += len; 06374 fprintf(f, ":%d:%d", (int )code, len); 06375 break; 06376 06377 case OP_CCLASS_MIX: 06378 case OP_CCLASS_MIX_NOT: 06379 n = bitset_on_num((BitSetRef )bp); 06380 bp += SIZE_BITSET; 06381 GET_LENGTH_INC(len, bp); 06382 q = bp; 06383 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06384 ALIGNMENT_RIGHT(q); 06385 #endif 06386 GET_CODE_POINT(code, q); 06387 bp += len; 06388 fprintf(f, ":%d:%d:%d", n, (int )code, len); 06389 break; 06390 06391 case OP_CCLASS_NODE: 06392 { 06393 CClassNode *cc; 06394 06395 GET_POINTER_INC(cc, bp); 06396 n = bitset_on_num(cc->bs); 06397 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n); 06398 } 06399 break; 06400 06401 case OP_BACKREFN_IC: 06402 mem = *((MemNumType* )bp); 06403 bp += SIZE_MEMNUM; 06404 fprintf(f, ":%d", mem); 06405 break; 06406 06407 case OP_BACKREF_MULTI_IC: 06408 case OP_BACKREF_MULTI: 06409 fputs(" ", f); 06410 GET_LENGTH_INC(len, bp); 06411 for (i = 0; i < len; i++) { 06412 GET_MEMNUM_INC(mem, bp); 06413 if (i > 0) fputs(", ", f); 06414 fprintf(f, "%d", mem); 06415 } 06416 break; 06417 06418 case OP_BACKREF_WITH_LEVEL: 06419 { 06420 OnigOptionType option; 06421 LengthType level; 06422 06423 GET_OPTION_INC(option, bp); 06424 fprintf(f, ":%d", option); 06425 GET_LENGTH_INC(level, bp); 06426 fprintf(f, ":%d", level); 06427 06428 fputs(" ", f); 06429 GET_LENGTH_INC(len, bp); 06430 for (i = 0; i < len; i++) { 06431 GET_MEMNUM_INC(mem, bp); 06432 if (i > 0) fputs(", ", f); 06433 fprintf(f, "%d", mem); 06434 } 06435 } 06436 break; 06437 06438 case OP_REPEAT: 06439 case OP_REPEAT_NG: 06440 { 06441 mem = *((MemNumType* )bp); 06442 bp += SIZE_MEMNUM; 06443 addr = *((RelAddrType* )bp); 06444 bp += SIZE_RELADDR; 06445 fprintf(f, ":%d:%d", mem, addr); 06446 } 06447 break; 06448 06449 case OP_PUSH_OR_JUMP_EXACT1: 06450 case OP_PUSH_IF_PEEK_NEXT: 06451 addr = *((RelAddrType* )bp); 06452 bp += SIZE_RELADDR; 06453 fprintf(f, ":(%d)", addr); 06454 p_string(f, 1, bp); 06455 bp += 1; 06456 break; 06457 06458 case OP_LOOK_BEHIND: 06459 GET_LENGTH_INC(len, bp); 06460 fprintf(f, ":%d", len); 06461 break; 06462 06463 case OP_PUSH_LOOK_BEHIND_NOT: 06464 GET_RELADDR_INC(addr, bp); 06465 GET_LENGTH_INC(len, bp); 06466 fprintf(f, ":%d:(%d)", len, addr); 06467 break; 06468 06469 case OP_STATE_CHECK_PUSH: 06470 case OP_STATE_CHECK_PUSH_OR_JUMP: 06471 scn = *((StateCheckNumType* )bp); 06472 bp += SIZE_STATE_CHECK_NUM; 06473 addr = *((RelAddrType* )bp); 06474 bp += SIZE_RELADDR; 06475 fprintf(f, ":%d:(%d)", scn, addr); 06476 break; 06477 06478 case OP_CONDITION: 06479 GET_MEMNUM_INC(mem, bp); 06480 GET_RELADDR_INC(addr, bp); 06481 fprintf(f, ":%d:(%d)", mem, addr); 06482 break; 06483 06484 default: 06485 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", 06486 *--bp); 06487 } 06488 } 06489 fputs("]", f); 06490 if (nextp) *nextp = bp; 06491 } 06492 06493 static void 06494 print_compiled_byte_code_list(FILE* f, regex_t* reg) 06495 { 06496 int ncode; 06497 UChar* bp = reg->p; 06498 UChar* end = reg->p + reg->used; 06499 06500 fprintf(f, "code length: %d", reg->used); 06501 06502 ncode = -1; 06503 while (bp < end) { 06504 ncode++; 06505 if (ncode % 5 == 0) 06506 fprintf(f, "\n%ld:", bp - reg->p); 06507 else 06508 fprintf(f, " %ld:", bp - reg->p); 06509 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); 06510 } 06511 06512 fprintf(f, "\n"); 06513 } 06514 06515 static void 06516 print_indent_tree(FILE* f, Node* node, int indent) 06517 { 06518 int i, type, container_p = 0; 06519 int add = 3; 06520 UChar* p; 06521 06522 Indent(f, indent); 06523 if (IS_NULL(node)) { 06524 fprintf(f, "ERROR: null node!!!\n"); 06525 exit (0); 06526 } 06527 06528 type = NTYPE(node); 06529 switch (type) { 06530 case NT_LIST: 06531 case NT_ALT: 06532 if (NTYPE(node) == NT_LIST) 06533 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node); 06534 else 06535 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node); 06536 06537 print_indent_tree(f, NCAR(node), indent + add); 06538 while (IS_NOT_NULL(node = NCDR(node))) { 06539 if (NTYPE(node) != type) { 06540 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); 06541 exit(0); 06542 } 06543 print_indent_tree(f, NCAR(node), indent + add); 06544 } 06545 break; 06546 06547 case NT_STR: 06548 fprintf(f, "<string%s:%"PRIxPTR">", 06549 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node); 06550 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { 06551 if (*p >= 0x20 && *p < 0x7f) 06552 fputc(*p, f); 06553 else { 06554 fprintf(f, " 0x%02x", *p); 06555 } 06556 } 06557 break; 06558 06559 case NT_CCLASS: 06560 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node); 06561 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); 06562 if (NCCLASS(node)->mbuf) { 06563 BBuf* bbuf = NCCLASS(node)->mbuf; 06564 for (i = 0; i < (int )bbuf->used; i++) { 06565 if (i > 0) fprintf(f, ","); 06566 fprintf(f, "%0x", bbuf->p[i]); 06567 } 06568 } 06569 break; 06570 06571 case NT_CTYPE: 06572 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node); 06573 switch (NCTYPE(node)->ctype) { 06574 case ONIGENC_CTYPE_WORD: 06575 if (NCTYPE(node)->not != 0) 06576 fputs("not word", f); 06577 else 06578 fputs("word", f); 06579 break; 06580 06581 default: 06582 fprintf(f, "ERROR: undefined ctype.\n"); 06583 exit(0); 06584 } 06585 break; 06586 06587 case NT_CANY: 06588 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node); 06589 break; 06590 06591 case NT_ANCHOR: 06592 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node); 06593 switch (NANCHOR(node)->type) { 06594 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; 06595 case ANCHOR_END_BUF: fputs("end buf", f); break; 06596 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; 06597 case ANCHOR_END_LINE: fputs("end line", f); break; 06598 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; 06599 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; 06600 case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break; 06601 06602 case ANCHOR_WORD_BOUND: fputs("word bound", f); break; 06603 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; 06604 #ifdef USE_WORD_BEGIN_END 06605 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; 06606 case ANCHOR_WORD_END: fputs("word end", f); break; 06607 #endif 06608 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break; 06609 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break; 06610 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break; 06611 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break; 06612 case ANCHOR_KEEP: fputs("keep",f); break; 06613 06614 default: 06615 fprintf(f, "ERROR: undefined anchor type.\n"); 06616 break; 06617 } 06618 break; 06619 06620 case NT_BREF: 06621 { 06622 int* p; 06623 BRefNode* br = NBREF(node); 06624 p = BACKREFS_P(br); 06625 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node); 06626 for (i = 0; i < br->back_num; i++) { 06627 if (i > 0) fputs(", ", f); 06628 fprintf(f, "%d", p[i]); 06629 } 06630 } 06631 break; 06632 06633 #ifdef USE_SUBEXP_CALL 06634 case NT_CALL: 06635 { 06636 CallNode* cn = NCALL(node); 06637 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node); 06638 p_string(f, cn->name_end - cn->name, cn->name); 06639 } 06640 break; 06641 #endif 06642 06643 case NT_QTFR: 06644 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node, 06645 NQTFR(node)->lower, NQTFR(node)->upper, 06646 (NQTFR(node)->greedy ? "" : "?")); 06647 print_indent_tree(f, NQTFR(node)->target, indent + add); 06648 break; 06649 06650 case NT_ENCLOSE: 06651 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node); 06652 switch (NENCLOSE(node)->type) { 06653 case ENCLOSE_OPTION: 06654 fprintf(f, "option:%d", NENCLOSE(node)->option); 06655 break; 06656 case ENCLOSE_MEMORY: 06657 fprintf(f, "memory:%d", NENCLOSE(node)->regnum); 06658 break; 06659 case ENCLOSE_STOP_BACKTRACK: 06660 fprintf(f, "stop-bt"); 06661 break; 06662 case ENCLOSE_CONDITION: 06663 fprintf(f, "condition:%d", NENCLOSE(node)->regnum); 06664 break; 06665 06666 default: 06667 break; 06668 } 06669 fprintf(f, "\n"); 06670 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06671 break; 06672 06673 default: 06674 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); 06675 break; 06676 } 06677 06678 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && 06679 type != NT_ENCLOSE) 06680 fprintf(f, "\n"); 06681 06682 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add); 06683 06684 fflush(f); 06685 } 06686 #endif /* ONIG_DEBUG */ 06687 06688 #ifdef ONIG_DEBUG_PARSE_TREE 06689 static void 06690 print_tree(FILE* f, Node* node) 06691 { 06692 print_indent_tree(f, node, 0); 06693 } 06694 #endif 06695
1.7.6.1