Ruby  2.0.0p481(2014-05-08revision45883)
regcomp.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * Copyright (c) 2011-2013  K.Takata  <kentkt AT csc DOT jp>
00007  * All rights reserved.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00019  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00022  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00023  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00024  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00025  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00026  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00027  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00028  * SUCH DAMAGE.
00029  */
00030 
00031 #include "regparse.h"
00032 
00033 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00034 
00035 extern OnigCaseFoldType
00036 onig_get_default_case_fold_flag(void)
00037 {
00038   return OnigDefaultCaseFoldFlag;
00039 }
00040 
00041 extern int
00042 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00043 {
00044   OnigDefaultCaseFoldFlag = case_fold_flag;
00045   return 0;
00046 }
00047 
00048 
00049 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00050 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00051 #endif
00052 
00053 #if 0
00054 static UChar*
00055 str_dup(UChar* s, UChar* end)
00056 {
00057   ptrdiff_t len = end - s;
00058 
00059   if (len > 0) {
00060     UChar* r = (UChar* )xmalloc(len + 1);
00061     CHECK_NULL_RETURN(r);
00062     xmemcpy(r, s, len);
00063     r[len] = (UChar )0;
00064     return r;
00065   }
00066   else return NULL;
00067 }
00068 #endif
00069 
00070 static void
00071 swap_node(Node* a, Node* b)
00072 {
00073   Node c;
00074   c = *a; *a = *b; *b = c;
00075 
00076   if (NTYPE(a) == NT_STR) {
00077     StrNode* sn = NSTR(a);
00078     if (sn->capa == 0) {
00079       size_t len = sn->end - sn->s;
00080       sn->s   = sn->buf;
00081       sn->end = sn->s + len;
00082     }
00083   }
00084 
00085   if (NTYPE(b) == NT_STR) {
00086     StrNode* sn = NSTR(b);
00087     if (sn->capa == 0) {
00088       size_t len = sn->end - sn->s;
00089       sn->s   = sn->buf;
00090       sn->end = sn->s + len;
00091     }
00092   }
00093 }
00094 
00095 static OnigDistance
00096 distance_add(OnigDistance d1, OnigDistance d2)
00097 {
00098   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00099     return ONIG_INFINITE_DISTANCE;
00100   else {
00101     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00102     else return ONIG_INFINITE_DISTANCE;
00103   }
00104 }
00105 
00106 static OnigDistance
00107 distance_multiply(OnigDistance d, int m)
00108 {
00109   if (m == 0) return 0;
00110 
00111   if (d < ONIG_INFINITE_DISTANCE / m)
00112     return d * m;
00113   else
00114     return ONIG_INFINITE_DISTANCE;
00115 }
00116 
00117 static int
00118 bitset_is_empty(BitSetRef bs)
00119 {
00120   int i;
00121   for (i = 0; i < BITSET_SIZE; i++) {
00122     if (bs[i] != 0) return 0;
00123   }
00124   return 1;
00125 }
00126 
00127 #ifdef ONIG_DEBUG
00128 static int
00129 onig_is_prelude(void)
00130 {
00131     return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00132 }
00133 
00134 static int
00135 bitset_on_num(BitSetRef bs)
00136 {
00137   int i, n;
00138 
00139   n = 0;
00140   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00141     if (BITSET_AT(bs, i)) n++;
00142   }
00143   return n;
00144 }
00145 #endif
00146 
00147 extern int
00148 onig_bbuf_init(BBuf* buf, OnigDistance size)
00149 {
00150   if (size <= 0) {
00151     size   = 0;
00152     buf->p = NULL;
00153   }
00154   else {
00155     buf->p = (UChar* )xmalloc(size);
00156     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00157   }
00158 
00159   buf->alloc = (unsigned int )size;
00160   buf->used  = 0;
00161   return 0;
00162 }
00163 
00164 
00165 #ifdef USE_SUBEXP_CALL
00166 
00167 static int
00168 unset_addr_list_init(UnsetAddrList* uslist, int size)
00169 {
00170   UnsetAddr* p;
00171 
00172   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00173   CHECK_NULL_RETURN_MEMERR(p);
00174   uslist->num   = 0;
00175   uslist->alloc = size;
00176   uslist->us    = p;
00177   return 0;
00178 }
00179 
00180 static void
00181 unset_addr_list_end(UnsetAddrList* uslist)
00182 {
00183   if (IS_NOT_NULL(uslist->us))
00184     xfree(uslist->us);
00185 }
00186 
00187 static int
00188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00189 {
00190   UnsetAddr* p;
00191   int size;
00192 
00193   if (uslist->num >= uslist->alloc) {
00194     size = uslist->alloc * 2;
00195     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00196     CHECK_NULL_RETURN_MEMERR(p);
00197     uslist->alloc = size;
00198     uslist->us    = p;
00199   }
00200 
00201   uslist->us[uslist->num].offset = offset;
00202   uslist->us[uslist->num].target = node;
00203   uslist->num++;
00204   return 0;
00205 }
00206 #endif /* USE_SUBEXP_CALL */
00207 
00208 
00209 static int
00210 add_opcode(regex_t* reg, int opcode)
00211 {
00212   BBUF_ADD1(reg, opcode);
00213   return 0;
00214 }
00215 
00216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00217 static int
00218 add_state_check_num(regex_t* reg, int num)
00219 {
00220   StateCheckNumType n = (StateCheckNumType )num;
00221 
00222   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00223   return 0;
00224 }
00225 #endif
00226 
00227 static int
00228 add_rel_addr(regex_t* reg, int addr)
00229 {
00230   RelAddrType ra = (RelAddrType )addr;
00231 
00232   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00233   return 0;
00234 }
00235 
00236 static int
00237 add_abs_addr(regex_t* reg, int addr)
00238 {
00239   AbsAddrType ra = (AbsAddrType )addr;
00240 
00241   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00242   return 0;
00243 }
00244 
00245 static int
00246 add_length(regex_t* reg, OnigDistance len)
00247 {
00248   LengthType l = (LengthType )len;
00249 
00250   BBUF_ADD(reg, &l, SIZE_LENGTH);
00251   return 0;
00252 }
00253 
00254 static int
00255 add_mem_num(regex_t* reg, int num)
00256 {
00257   MemNumType n = (MemNumType )num;
00258 
00259   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00260   return 0;
00261 }
00262 
00263 static int
00264 add_pointer(regex_t* reg, void* addr)
00265 {
00266   PointerType ptr = (PointerType )addr;
00267 
00268   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00269   return 0;
00270 }
00271 
00272 static int
00273 add_option(regex_t* reg, OnigOptionType option)
00274 {
00275   BBUF_ADD(reg, &option, SIZE_OPTION);
00276   return 0;
00277 }
00278 
00279 static int
00280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00281 {
00282   int r;
00283 
00284   r = add_opcode(reg, opcode);
00285   if (r) return r;
00286   r = add_rel_addr(reg, addr);
00287   return r;
00288 }
00289 
00290 static int
00291 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00292 {
00293   BBUF_ADD(reg, bytes, len);
00294   return 0;
00295 }
00296 
00297 static int
00298 add_bitset(regex_t* reg, BitSetRef bs)
00299 {
00300   BBUF_ADD(reg, bs, SIZE_BITSET);
00301   return 0;
00302 }
00303 
00304 static int
00305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00306 {
00307   int r;
00308 
00309   r = add_opcode(reg, opcode);
00310   if (r) return r;
00311   r = add_option(reg, option);
00312   return r;
00313 }
00314 
00315 static int compile_length_tree(Node* node, regex_t* reg);
00316 static int compile_tree(Node* node, regex_t* reg);
00317 
00318 
00319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00320    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00321     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00322 
00323 static int
00324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00325 {
00326   int op;
00327 
00328   if (ignore_case) {
00329     switch (str_len) {
00330     case 1:  op = OP_EXACT1_IC; break;
00331     default: op = OP_EXACTN_IC; break;
00332     }
00333   }
00334   else {
00335     switch (mb_len) {
00336     case 1:
00337       switch (str_len) {
00338       case 1:  op = OP_EXACT1; break;
00339       case 2:  op = OP_EXACT2; break;
00340       case 3:  op = OP_EXACT3; break;
00341       case 4:  op = OP_EXACT4; break;
00342       case 5:  op = OP_EXACT5; break;
00343       default: op = OP_EXACTN; break;
00344       }
00345       break;
00346 
00347     case 2:
00348       switch (str_len) {
00349       case 1:  op = OP_EXACTMB2N1; break;
00350       case 2:  op = OP_EXACTMB2N2; break;
00351       case 3:  op = OP_EXACTMB2N3; break;
00352       default: op = OP_EXACTMB2N;  break;
00353       }
00354       break;
00355 
00356     case 3:
00357       op = OP_EXACTMB3N;
00358       break;
00359 
00360     default:
00361       op = OP_EXACTMBN;
00362       break;
00363     }
00364   }
00365   return op;
00366 }
00367 
00368 static int
00369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00370 {
00371   int r;
00372   int saved_num_null_check = reg->num_null_check;
00373 
00374   if (empty_info != 0) {
00375     r = add_opcode(reg, OP_NULL_CHECK_START);
00376     if (r) return r;
00377     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00378     if (r) return r;
00379     reg->num_null_check++;
00380   }
00381 
00382   r = compile_tree(node, reg);
00383   if (r) return r;
00384 
00385   if (empty_info != 0) {
00386     if (empty_info == NQ_TARGET_IS_EMPTY)
00387       r = add_opcode(reg, OP_NULL_CHECK_END);
00388     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00389       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00390     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00391       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00392 
00393     if (r) return r;
00394     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00395   }
00396   return r;
00397 }
00398 
00399 #ifdef USE_SUBEXP_CALL
00400 static int
00401 compile_call(CallNode* node, regex_t* reg)
00402 {
00403   int r;
00404 
00405   r = add_opcode(reg, OP_CALL);
00406   if (r) return r;
00407   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00408                           node->target);
00409   if (r) return r;
00410   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00411   return r;
00412 }
00413 #endif
00414 
00415 static int
00416 compile_tree_n_times(Node* node, int n, regex_t* reg)
00417 {
00418   int i, r;
00419 
00420   for (i = 0; i < n; i++) {
00421     r = compile_tree(node, reg);
00422     if (r) return r;
00423   }
00424   return 0;
00425 }
00426 
00427 static int
00428 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00429                           regex_t* reg ARG_UNUSED, int ignore_case)
00430 {
00431   int len;
00432   int op = select_str_opcode(mb_len, str_len, ignore_case);
00433 
00434   len = SIZE_OPCODE;
00435 
00436   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00437   if (IS_NEED_STR_LEN_OP_EXACT(op))
00438     len += SIZE_LENGTH;
00439 
00440   len += mb_len * (int )str_len;
00441   return len;
00442 }
00443 
00444 static int
00445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00446                    regex_t* reg, int ignore_case)
00447 {
00448   int op = select_str_opcode(mb_len, str_len, ignore_case);
00449   add_opcode(reg, op);
00450 
00451   if (op == OP_EXACTMBN)
00452     add_length(reg, mb_len);
00453 
00454   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00455     if (op == OP_EXACTN_IC)
00456       add_length(reg, mb_len * str_len);
00457     else
00458       add_length(reg, str_len);
00459   }
00460 
00461   add_bytes(reg, s, mb_len * str_len);
00462   return 0;
00463 }
00464 
00465 
00466 static int
00467 compile_length_string_node(Node* node, regex_t* reg)
00468 {
00469   int rlen, r, len, prev_len, slen, ambig;
00470   OnigEncoding enc = reg->enc;
00471   UChar *p, *prev;
00472   StrNode* sn;
00473 
00474   sn = NSTR(node);
00475   if (sn->end <= sn->s)
00476     return 0;
00477 
00478   ambig = NSTRING_IS_AMBIG(node);
00479 
00480   p = prev = sn->s;
00481   prev_len = enclen(enc, p, sn->end);
00482   p += prev_len;
00483   slen = 1;
00484   rlen = 0;
00485 
00486   for (; p < sn->end; ) {
00487     len = enclen(enc, p, sn->end);
00488     if (len == prev_len) {
00489       slen++;
00490     }
00491     else {
00492       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00493       rlen += r;
00494       prev = p;
00495       slen = 1;
00496       prev_len = len;
00497     }
00498     p += len;
00499   }
00500   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00501   rlen += r;
00502   return rlen;
00503 }
00504 
00505 static int
00506 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00507 {
00508   if (sn->end <= sn->s)
00509     return 0;
00510 
00511   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00512 }
00513 
00514 static int
00515 compile_string_node(Node* node, regex_t* reg)
00516 {
00517   int r, len, prev_len, slen, ambig;
00518   OnigEncoding enc = reg->enc;
00519   UChar *p, *prev, *end;
00520   StrNode* sn;
00521 
00522   sn = NSTR(node);
00523   if (sn->end <= sn->s)
00524     return 0;
00525 
00526   end = sn->end;
00527   ambig = NSTRING_IS_AMBIG(node);
00528 
00529   p = prev = sn->s;
00530   prev_len = enclen(enc, p, end);
00531   p += prev_len;
00532   slen = 1;
00533 
00534   for (; p < end; ) {
00535     len = enclen(enc, p, end);
00536     if (len == prev_len) {
00537       slen++;
00538     }
00539     else {
00540       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00541       if (r) return r;
00542 
00543       prev  = p;
00544       slen  = 1;
00545       prev_len = len;
00546     }
00547 
00548     p += len;
00549   }
00550   return add_compile_string(prev, prev_len, slen, reg, ambig);
00551 }
00552 
00553 static int
00554 compile_string_raw_node(StrNode* sn, regex_t* reg)
00555 {
00556   if (sn->end <= sn->s)
00557     return 0;
00558 
00559   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00560 }
00561 
00562 static int
00563 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00564 {
00565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00566   add_length(reg, mbuf->used);
00567   return add_bytes(reg, mbuf->p, mbuf->used);
00568 #else
00569   int r, pad_size;
00570   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00571 
00572   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00573   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00574   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00575 
00576   r = add_bytes(reg, mbuf->p, mbuf->used);
00577 
00578   /* padding for return value from compile_length_cclass_node() to be fix. */
00579   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00580   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00581   return r;
00582 #endif
00583 }
00584 
00585 static int
00586 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00587 {
00588   int len;
00589 
00590   if (IS_NCCLASS_SHARE(cc)) {
00591     len = SIZE_OPCODE + SIZE_POINTER;
00592     return len;
00593   }
00594 
00595   if (IS_NULL(cc->mbuf)) {
00596     len = SIZE_OPCODE + SIZE_BITSET;
00597   }
00598   else {
00599     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00600       len = SIZE_OPCODE;
00601     }
00602     else {
00603       len = SIZE_OPCODE + SIZE_BITSET;
00604     }
00605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00606     len += SIZE_LENGTH + cc->mbuf->used;
00607 #else
00608     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00609 #endif
00610   }
00611 
00612   return len;
00613 }
00614 
00615 static int
00616 compile_cclass_node(CClassNode* cc, regex_t* reg)
00617 {
00618   int r;
00619 
00620   if (IS_NCCLASS_SHARE(cc)) {
00621     add_opcode(reg, OP_CCLASS_NODE);
00622     r = add_pointer(reg, cc);
00623     return r;
00624   }
00625 
00626   if (IS_NULL(cc->mbuf)) {
00627     if (IS_NCCLASS_NOT(cc))
00628       add_opcode(reg, OP_CCLASS_NOT);
00629     else
00630       add_opcode(reg, OP_CCLASS);
00631 
00632     r = add_bitset(reg, cc->bs);
00633   }
00634   else {
00635     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00636       if (IS_NCCLASS_NOT(cc))
00637         add_opcode(reg, OP_CCLASS_MB_NOT);
00638       else
00639         add_opcode(reg, OP_CCLASS_MB);
00640 
00641       r = add_multi_byte_cclass(cc->mbuf, reg);
00642     }
00643     else {
00644       if (IS_NCCLASS_NOT(cc))
00645         add_opcode(reg, OP_CCLASS_MIX_NOT);
00646       else
00647         add_opcode(reg, OP_CCLASS_MIX);
00648 
00649       r = add_bitset(reg, cc->bs);
00650       if (r) return r;
00651       r = add_multi_byte_cclass(cc->mbuf, reg);
00652     }
00653   }
00654 
00655   return r;
00656 }
00657 
00658 static int
00659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00660 {
00661 #define REPEAT_RANGE_ALLOC  4
00662 
00663   OnigRepeatRange* p;
00664 
00665   if (reg->repeat_range_alloc == 0) {
00666     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00667     CHECK_NULL_RETURN_MEMERR(p);
00668     reg->repeat_range = p;
00669     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00670   }
00671   else if (reg->repeat_range_alloc <= id) {
00672     int n;
00673     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00674     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00675                                     sizeof(OnigRepeatRange) * n);
00676     CHECK_NULL_RETURN_MEMERR(p);
00677     reg->repeat_range = p;
00678     reg->repeat_range_alloc = n;
00679   }
00680   else {
00681     p = reg->repeat_range;
00682   }
00683 
00684   p[id].lower = lower;
00685   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00686   return 0;
00687 }
00688 
00689 static int
00690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00691                           regex_t* reg)
00692 {
00693   int r;
00694   int num_repeat = reg->num_repeat;
00695 
00696   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00697   if (r) return r;
00698   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00699   reg->num_repeat++;
00700   if (r) return r;
00701   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00702   if (r) return r;
00703 
00704   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00705   if (r) return r;
00706 
00707   r = compile_tree_empty_check(qn->target, reg, empty_info);
00708   if (r) return r;
00709 
00710   if (
00711 #ifdef USE_SUBEXP_CALL
00712       reg->num_call > 0 ||
00713 #endif
00714       IS_QUANTIFIER_IN_REPEAT(qn)) {
00715     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00716   }
00717   else {
00718     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00719   }
00720   if (r) return r;
00721   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00722   return r;
00723 }
00724 
00725 static int
00726 is_anychar_star_quantifier(QtfrNode* qn)
00727 {
00728   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00729       NTYPE(qn->target) == NT_CANY)
00730     return 1;
00731   else
00732     return 0;
00733 }
00734 
00735 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00736 #define CKN_ON   (ckn > 0)
00737 
00738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00739 
00740 static int
00741 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00742 {
00743   int len, mod_tlen, cklen;
00744   int ckn;
00745   int infinite = IS_REPEAT_INFINITE(qn->upper);
00746   int empty_info = qn->target_empty_info;
00747   int tlen = compile_length_tree(qn->target, reg);
00748 
00749   if (tlen < 0) return tlen;
00750 
00751   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00752 
00753   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00754 
00755   /* anychar repeat */
00756   if (NTYPE(qn->target) == NT_CANY) {
00757     if (qn->greedy && infinite) {
00758       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00759         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00760       else
00761         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00762     }
00763   }
00764 
00765   if (empty_info != 0)
00766     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00767   else
00768     mod_tlen = tlen;
00769 
00770   if (infinite && qn->lower <= 1) {
00771     if (qn->greedy) {
00772       if (qn->lower == 1)
00773         len = SIZE_OP_JUMP;
00774       else
00775         len = 0;
00776 
00777       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00778     }
00779     else {
00780       if (qn->lower == 0)
00781         len = SIZE_OP_JUMP;
00782       else
00783         len = 0;
00784 
00785       len += mod_tlen + SIZE_OP_PUSH + cklen;
00786     }
00787   }
00788   else if (qn->upper == 0) {
00789     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00790       len = SIZE_OP_JUMP + tlen;
00791     else
00792       len = 0;
00793   }
00794   else if (qn->upper == 1 && qn->greedy) {
00795     if (qn->lower == 0) {
00796       if (CKN_ON) {
00797         len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00798       }
00799       else {
00800         len = SIZE_OP_PUSH + tlen;
00801       }
00802     }
00803     else {
00804       len = tlen;
00805     }
00806   }
00807   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00808     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00809   }
00810   else {
00811     len = SIZE_OP_REPEAT_INC
00812         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00813     if (CKN_ON)
00814       len += SIZE_OP_STATE_CHECK;
00815   }
00816 
00817   return len;
00818 }
00819 
00820 static int
00821 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00822 {
00823   int r, mod_tlen;
00824   int ckn;
00825   int infinite = IS_REPEAT_INFINITE(qn->upper);
00826   int empty_info = qn->target_empty_info;
00827   int tlen = compile_length_tree(qn->target, reg);
00828 
00829   if (tlen < 0) return tlen;
00830 
00831   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00832 
00833   if (is_anychar_star_quantifier(qn)) {
00834     r = compile_tree_n_times(qn->target, qn->lower, reg);
00835     if (r) return r;
00836     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00837       if (IS_MULTILINE(reg->options))
00838         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00839       else
00840         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00841       if (r) return r;
00842       if (CKN_ON) {
00843         r = add_state_check_num(reg, ckn);
00844         if (r) return r;
00845       }
00846 
00847       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00848     }
00849     else {
00850       if (IS_MULTILINE(reg->options)) {
00851         r = add_opcode(reg, (CKN_ON ?
00852                                OP_STATE_CHECK_ANYCHAR_ML_STAR
00853                              : OP_ANYCHAR_ML_STAR));
00854       }
00855       else {
00856         r = add_opcode(reg, (CKN_ON ?
00857                                OP_STATE_CHECK_ANYCHAR_STAR
00858                              : OP_ANYCHAR_STAR));
00859       }
00860       if (r) return r;
00861       if (CKN_ON)
00862         r = add_state_check_num(reg, ckn);
00863 
00864       return r;
00865     }
00866   }
00867 
00868   if (empty_info != 0)
00869     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00870   else
00871     mod_tlen = tlen;
00872 
00873   if (infinite && qn->lower <= 1) {
00874     if (qn->greedy) {
00875       if (qn->lower == 1) {
00876         r = add_opcode_rel_addr(reg, OP_JUMP,
00877                         (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00878         if (r) return r;
00879       }
00880 
00881       if (CKN_ON) {
00882         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00883         if (r) return r;
00884         r = add_state_check_num(reg, ckn);
00885         if (r) return r;
00886         r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00887       }
00888       else {
00889         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00890       }
00891       if (r) return r;
00892       r = compile_tree_empty_check(qn->target, reg, empty_info);
00893       if (r) return r;
00894       r = add_opcode_rel_addr(reg, OP_JUMP,
00895               -(mod_tlen + (int )SIZE_OP_JUMP
00896                 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00897     }
00898     else {
00899       if (qn->lower == 0) {
00900         r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00901         if (r) return r;
00902       }
00903       r = compile_tree_empty_check(qn->target, reg, empty_info);
00904       if (r) return r;
00905       if (CKN_ON) {
00906         r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00907         if (r) return r;
00908         r = add_state_check_num(reg, ckn);
00909         if (r) return r;
00910         r = add_rel_addr(reg,
00911                  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00912       }
00913       else
00914         r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00915     }
00916   }
00917   else if (qn->upper == 0) {
00918     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00919       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00920       if (r) return r;
00921       r = compile_tree(qn->target, reg);
00922     }
00923     else
00924       r = 0;
00925   }
00926   else if (qn->upper == 1 && qn->greedy) {
00927     if (qn->lower == 0) {
00928       if (CKN_ON) {
00929         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00930         if (r) return r;
00931         r = add_state_check_num(reg, ckn);
00932         if (r) return r;
00933         r = add_rel_addr(reg, tlen);
00934       }
00935       else {
00936         r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00937       }
00938       if (r) return r;
00939     }
00940 
00941     r = compile_tree(qn->target, reg);
00942   }
00943   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00944     if (CKN_ON) {
00945       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00946       if (r) return r;
00947       r = add_state_check_num(reg, ckn);
00948       if (r) return r;
00949       r = add_rel_addr(reg, SIZE_OP_JUMP);
00950     }
00951     else {
00952       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00953     }
00954 
00955     if (r) return r;
00956     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00957     if (r) return r;
00958     r = compile_tree(qn->target, reg);
00959   }
00960   else {
00961     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00962     if (CKN_ON) {
00963       if (r) return r;
00964       r = add_opcode(reg, OP_STATE_CHECK);
00965       if (r) return r;
00966       r = add_state_check_num(reg, ckn);
00967     }
00968   }
00969   return r;
00970 }
00971 
00972 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00973 
00974 static int
00975 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00976 {
00977   int len, mod_tlen;
00978   int infinite = IS_REPEAT_INFINITE(qn->upper);
00979   int empty_info = qn->target_empty_info;
00980   int tlen = compile_length_tree(qn->target, reg);
00981 
00982   if (tlen < 0) return tlen;
00983 
00984   /* anychar repeat */
00985   if (NTYPE(qn->target) == NT_CANY) {
00986     if (qn->greedy && infinite) {
00987       if (IS_NOT_NULL(qn->next_head_exact))
00988         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00989       else
00990         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00991     }
00992   }
00993 
00994   if (empty_info != 0)
00995     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00996   else
00997     mod_tlen = tlen;
00998 
00999   if (infinite &&
01000       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01001     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01002       len = SIZE_OP_JUMP;
01003     }
01004     else {
01005       len = tlen * qn->lower;
01006     }
01007 
01008     if (qn->greedy) {
01009       if (IS_NOT_NULL(qn->head_exact))
01010         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01011       else if (IS_NOT_NULL(qn->next_head_exact))
01012         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01013       else
01014         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01015     }
01016     else
01017       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01018   }
01019   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01020     len = SIZE_OP_JUMP + tlen;
01021   }
01022   else if (!infinite && qn->greedy &&
01023            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01024                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01025     len = tlen * qn->lower;
01026     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01027   }
01028   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01029     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01030   }
01031   else {
01032     len = SIZE_OP_REPEAT_INC
01033         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01034   }
01035 
01036   return len;
01037 }
01038 
01039 static int
01040 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01041 {
01042   int i, r, mod_tlen;
01043   int infinite = IS_REPEAT_INFINITE(qn->upper);
01044   int empty_info = qn->target_empty_info;
01045   int tlen = compile_length_tree(qn->target, reg);
01046 
01047   if (tlen < 0) return tlen;
01048 
01049   if (is_anychar_star_quantifier(qn)) {
01050     r = compile_tree_n_times(qn->target, qn->lower, reg);
01051     if (r) return r;
01052     if (IS_NOT_NULL(qn->next_head_exact)) {
01053       if (IS_MULTILINE(reg->options))
01054         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01055       else
01056         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01057       if (r) return r;
01058       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01059     }
01060     else {
01061       if (IS_MULTILINE(reg->options))
01062         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01063       else
01064         return add_opcode(reg, OP_ANYCHAR_STAR);
01065     }
01066   }
01067 
01068   if (empty_info != 0)
01069     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01070   else
01071     mod_tlen = tlen;
01072 
01073   if (infinite &&
01074       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01075     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01076       if (qn->greedy) {
01077         if (IS_NOT_NULL(qn->head_exact))
01078           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01079         else if (IS_NOT_NULL(qn->next_head_exact))
01080           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01081         else
01082           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01083       }
01084       else {
01085         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01086       }
01087       if (r) return r;
01088     }
01089     else {
01090       r = compile_tree_n_times(qn->target, qn->lower, reg);
01091       if (r) return r;
01092     }
01093 
01094     if (qn->greedy) {
01095       if (IS_NOT_NULL(qn->head_exact)) {
01096         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01097                              mod_tlen + SIZE_OP_JUMP);
01098         if (r) return r;
01099         add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01100         r = compile_tree_empty_check(qn->target, reg, empty_info);
01101         if (r) return r;
01102         r = add_opcode_rel_addr(reg, OP_JUMP,
01103         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01104       }
01105       else if (IS_NOT_NULL(qn->next_head_exact)) {
01106         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01107                                 mod_tlen + SIZE_OP_JUMP);
01108         if (r) return r;
01109         add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01110         r = compile_tree_empty_check(qn->target, reg, empty_info);
01111         if (r) return r;
01112         r = add_opcode_rel_addr(reg, OP_JUMP,
01113           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01114       }
01115       else {
01116         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01117         if (r) return r;
01118         r = compile_tree_empty_check(qn->target, reg, empty_info);
01119         if (r) return r;
01120         r = add_opcode_rel_addr(reg, OP_JUMP,
01121                      -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01122       }
01123     }
01124     else {
01125       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01126       if (r) return r;
01127       r = compile_tree_empty_check(qn->target, reg, empty_info);
01128       if (r) return r;
01129       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01130     }
01131   }
01132   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01133     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01134     if (r) return r;
01135     r = compile_tree(qn->target, reg);
01136   }
01137   else if (!infinite && qn->greedy &&
01138            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01139                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01140     int n = qn->upper - qn->lower;
01141 
01142     r = compile_tree_n_times(qn->target, qn->lower, reg);
01143     if (r) return r;
01144 
01145     for (i = 0; i < n; i++) {
01146       r = add_opcode_rel_addr(reg, OP_PUSH,
01147                            (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01148       if (r) return r;
01149       r = compile_tree(qn->target, reg);
01150       if (r) return r;
01151     }
01152   }
01153   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01154     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01155     if (r) return r;
01156     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01157     if (r) return r;
01158     r = compile_tree(qn->target, reg);
01159   }
01160   else {
01161     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01162   }
01163   return r;
01164 }
01165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01166 
01167 static int
01168 compile_length_option_node(EncloseNode* node, regex_t* reg)
01169 {
01170   int tlen;
01171   OnigOptionType prev = reg->options;
01172 
01173   reg->options = node->option;
01174   tlen = compile_length_tree(node->target, reg);
01175   reg->options = prev;
01176 
01177   if (tlen < 0) return tlen;
01178 
01179   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01180     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01181            + tlen + SIZE_OP_SET_OPTION;
01182   }
01183   else
01184     return tlen;
01185 }
01186 
01187 static int
01188 compile_option_node(EncloseNode* node, regex_t* reg)
01189 {
01190   int r;
01191   OnigOptionType prev = reg->options;
01192 
01193   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01194     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01195     if (r) return r;
01196     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01197     if (r) return r;
01198     r = add_opcode(reg, OP_FAIL);
01199     if (r) return r;
01200   }
01201 
01202   reg->options = node->option;
01203   r = compile_tree(node->target, reg);
01204   reg->options = prev;
01205 
01206   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01207     if (r) return r;
01208     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01209   }
01210   return r;
01211 }
01212 
01213 static int
01214 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01215 {
01216   int len;
01217   int tlen;
01218 
01219   if (node->type == ENCLOSE_OPTION)
01220     return compile_length_option_node(node, reg);
01221 
01222   if (node->target) {
01223     tlen = compile_length_tree(node->target, reg);
01224     if (tlen < 0) return tlen;
01225   }
01226   else
01227     tlen = 0;
01228 
01229   switch (node->type) {
01230   case ENCLOSE_MEMORY:
01231 #ifdef USE_SUBEXP_CALL
01232     if (IS_ENCLOSE_CALLED(node)) {
01233       len = SIZE_OP_MEMORY_START_PUSH + tlen
01234           + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01235       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01236         len += (IS_ENCLOSE_RECURSION(node)
01237                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01238       else
01239         len += (IS_ENCLOSE_RECURSION(node)
01240                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01241     }
01242     else
01243 #endif
01244     {
01245       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01246         len = SIZE_OP_MEMORY_START_PUSH;
01247       else
01248         len = SIZE_OP_MEMORY_START;
01249 
01250       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01251                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01252     }
01253     break;
01254 
01255   case ENCLOSE_STOP_BACKTRACK:
01256     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01257       QtfrNode* qn = NQTFR(node->target);
01258       tlen = compile_length_tree(qn->target, reg);
01259       if (tlen < 0) return tlen;
01260 
01261       len = tlen * qn->lower
01262           + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01263     }
01264     else {
01265       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01266     }
01267     break;
01268 
01269   case ENCLOSE_CONDITION:
01270     len = SIZE_OP_CONDITION;
01271     if (NTYPE(node->target) == NT_ALT) {
01272       Node* x = node->target;
01273 
01274       tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
01275       if (tlen < 0) return tlen;
01276       len += tlen + SIZE_OP_JUMP;
01277       if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01278       x = NCDR(x);
01279       tlen = compile_length_tree(NCAR(x), reg); /* no-node */
01280       if (tlen < 0) return tlen;
01281       len += tlen;
01282       if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01283     }
01284     else {
01285       return ONIGERR_PARSER_BUG;
01286     }
01287     break;
01288 
01289   default:
01290     return ONIGERR_TYPE_BUG;
01291     break;
01292   }
01293 
01294   return len;
01295 }
01296 
01297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01298 
01299 static int
01300 compile_enclose_node(EncloseNode* node, regex_t* reg)
01301 {
01302   int r, len;
01303 
01304   if (node->type == ENCLOSE_OPTION)
01305     return compile_option_node(node, reg);
01306 
01307   switch (node->type) {
01308   case ENCLOSE_MEMORY:
01309 #ifdef USE_SUBEXP_CALL
01310     if (IS_ENCLOSE_CALLED(node)) {
01311       r = add_opcode(reg, OP_CALL);
01312       if (r) return r;
01313       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01314       node->state |= NST_ADDR_FIXED;
01315       r = add_abs_addr(reg, (int )node->call_addr);
01316       if (r) return r;
01317       len = compile_length_tree(node->target, reg);
01318       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01319       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01320         len += (IS_ENCLOSE_RECURSION(node)
01321                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01322       else
01323         len += (IS_ENCLOSE_RECURSION(node)
01324                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01325 
01326       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01327       if (r) return r;
01328     }
01329 #endif
01330     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01331       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01332     else
01333       r = add_opcode(reg, OP_MEMORY_START);
01334     if (r) return r;
01335     r = add_mem_num(reg, node->regnum);
01336     if (r) return r;
01337     r = compile_tree(node->target, reg);
01338     if (r) return r;
01339 #ifdef USE_SUBEXP_CALL
01340     if (IS_ENCLOSE_CALLED(node)) {
01341       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01342         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01343                              ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01344       else
01345         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01346                              ? OP_MEMORY_END_REC : OP_MEMORY_END));
01347 
01348       if (r) return r;
01349       r = add_mem_num(reg, node->regnum);
01350       if (r) return r;
01351       r = add_opcode(reg, OP_RETURN);
01352     }
01353     else
01354 #endif
01355     {
01356       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01357         r = add_opcode(reg, OP_MEMORY_END_PUSH);
01358       else
01359         r = add_opcode(reg, OP_MEMORY_END);
01360       if (r) return r;
01361       r = add_mem_num(reg, node->regnum);
01362     }
01363     break;
01364 
01365   case ENCLOSE_STOP_BACKTRACK:
01366     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01367       QtfrNode* qn = NQTFR(node->target);
01368       r = compile_tree_n_times(qn->target, qn->lower, reg);
01369       if (r) return r;
01370 
01371       len = compile_length_tree(qn->target, reg);
01372       if (len < 0) return len;
01373 
01374       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01375       if (r) return r;
01376       r = compile_tree(qn->target, reg);
01377       if (r) return r;
01378       r = add_opcode(reg, OP_POP);
01379       if (r) return r;
01380       r = add_opcode_rel_addr(reg, OP_JUMP,
01381          -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01382     }
01383     else {
01384       r = add_opcode(reg, OP_PUSH_STOP_BT);
01385       if (r) return r;
01386       r = compile_tree(node->target, reg);
01387       if (r) return r;
01388       r = add_opcode(reg, OP_POP_STOP_BT);
01389     }
01390     break;
01391 
01392   case ENCLOSE_CONDITION:
01393     r = add_opcode(reg, OP_CONDITION);
01394     if (r) return r;
01395     r = add_mem_num(reg, node->regnum);
01396     if (r) return r;
01397 
01398     if (NTYPE(node->target) == NT_ALT) {
01399       Node* x = node->target;
01400       int len2;
01401 
01402       len = compile_length_tree(NCAR(x), reg);  /* yes-node */
01403       if (len < 0) return len;
01404       if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01405       x = NCDR(x);
01406       len2 = compile_length_tree(NCAR(x), reg); /* no-node */
01407       if (len2 < 0) return len2;
01408       if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01409 
01410       x = node->target;
01411       r = add_rel_addr(reg, len + SIZE_OP_JUMP);
01412       if (r) return r;
01413       r = compile_tree(NCAR(x), reg);   /* yes-node */
01414       if (r) return r;
01415       r = add_opcode_rel_addr(reg, OP_JUMP, len2);
01416       if (r) return r;
01417       x = NCDR(x);
01418       r = compile_tree(NCAR(x), reg);   /* no-node */
01419     }
01420     else {
01421       return ONIGERR_PARSER_BUG;
01422     }
01423     break;
01424 
01425   default:
01426     return ONIGERR_TYPE_BUG;
01427     break;
01428   }
01429 
01430   return r;
01431 }
01432 
01433 static int
01434 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01435 {
01436   int len;
01437   int tlen = 0;
01438 
01439   if (node->target) {
01440     tlen = compile_length_tree(node->target, reg);
01441     if (tlen < 0) return tlen;
01442   }
01443 
01444   switch (node->type) {
01445   case ANCHOR_PREC_READ:
01446     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01447     break;
01448   case ANCHOR_PREC_READ_NOT:
01449     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01450     break;
01451   case ANCHOR_LOOK_BEHIND:
01452     len = SIZE_OP_LOOK_BEHIND + tlen;
01453     break;
01454   case ANCHOR_LOOK_BEHIND_NOT:
01455     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01456     break;
01457 
01458   default:
01459     len = SIZE_OPCODE;
01460     break;
01461   }
01462 
01463   return len;
01464 }
01465 
01466 static int
01467 compile_anchor_node(AnchorNode* node, regex_t* reg)
01468 {
01469   int r, len;
01470 
01471   switch (node->type) {
01472   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01473   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01474   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01475   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01476   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01477   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01478 
01479   /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
01480   case ANCHOR_ANYCHAR_STAR:   r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
01481 
01482   case ANCHOR_WORD_BOUND:
01483     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
01484     else                      r = add_opcode(reg, OP_WORD_BOUND);
01485     break;
01486   case ANCHOR_NOT_WORD_BOUND:
01487     if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
01488     else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
01489     break;
01490 #ifdef USE_WORD_BEGIN_END
01491   case ANCHOR_WORD_BEGIN:
01492     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
01493     else                      r = add_opcode(reg, OP_WORD_BEGIN);
01494     break;
01495   case ANCHOR_WORD_END:
01496     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
01497     else                      r = add_opcode(reg, OP_WORD_END);
01498     break;
01499 #endif
01500   case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
01501 
01502   case ANCHOR_PREC_READ:
01503     r = add_opcode(reg, OP_PUSH_POS);
01504     if (r) return r;
01505     r = compile_tree(node->target, reg);
01506     if (r) return r;
01507     r = add_opcode(reg, OP_POP_POS);
01508     break;
01509 
01510   case ANCHOR_PREC_READ_NOT:
01511     len = compile_length_tree(node->target, reg);
01512     if (len < 0) return len;
01513     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01514     if (r) return r;
01515     r = compile_tree(node->target, reg);
01516     if (r) return r;
01517     r = add_opcode(reg, OP_FAIL_POS);
01518     break;
01519 
01520   case ANCHOR_LOOK_BEHIND:
01521     {
01522       int n;
01523       r = add_opcode(reg, OP_LOOK_BEHIND);
01524       if (r) return r;
01525       if (node->char_len < 0) {
01526         r = get_char_length_tree(node->target, reg, &n);
01527         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01528       }
01529       else
01530         n = node->char_len;
01531       r = add_length(reg, n);
01532       if (r) return r;
01533       r = compile_tree(node->target, reg);
01534     }
01535     break;
01536 
01537   case ANCHOR_LOOK_BEHIND_NOT:
01538     {
01539       int n;
01540       len = compile_length_tree(node->target, reg);
01541       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01542                            len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01543       if (r) return r;
01544       if (node->char_len < 0) {
01545         r = get_char_length_tree(node->target, reg, &n);
01546         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01547       }
01548       else
01549         n = node->char_len;
01550       r = add_length(reg, n);
01551       if (r) return r;
01552       r = compile_tree(node->target, reg);
01553       if (r) return r;
01554       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01555     }
01556     break;
01557 
01558   default:
01559     return ONIGERR_TYPE_BUG;
01560     break;
01561   }
01562 
01563   return r;
01564 }
01565 
01566 static int
01567 compile_length_tree(Node* node, regex_t* reg)
01568 {
01569   int len, type, r;
01570 
01571   type = NTYPE(node);
01572   switch (type) {
01573   case NT_LIST:
01574     len = 0;
01575     do {
01576       r = compile_length_tree(NCAR(node), reg);
01577       if (r < 0) return r;
01578       len += r;
01579     } while (IS_NOT_NULL(node = NCDR(node)));
01580     r = len;
01581     break;
01582 
01583   case NT_ALT:
01584     {
01585       int n;
01586 
01587       n = r = 0;
01588       do {
01589         r += compile_length_tree(NCAR(node), reg);
01590         n++;
01591       } while (IS_NOT_NULL(node = NCDR(node)));
01592       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01593     }
01594     break;
01595 
01596   case NT_STR:
01597     if (NSTRING_IS_RAW(node))
01598       r = compile_length_string_raw_node(NSTR(node), reg);
01599     else
01600       r = compile_length_string_node(node, reg);
01601     break;
01602 
01603   case NT_CCLASS:
01604     r = compile_length_cclass_node(NCCLASS(node), reg);
01605     break;
01606 
01607   case NT_CTYPE:
01608   case NT_CANY:
01609     r = SIZE_OPCODE;
01610     break;
01611 
01612   case NT_BREF:
01613     {
01614       BRefNode* br = NBREF(node);
01615 
01616 #ifdef USE_BACKREF_WITH_LEVEL
01617       if (IS_BACKREF_NEST_LEVEL(br)) {
01618         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01619             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01620       }
01621       else
01622 #endif
01623       if (br->back_num == 1) {
01624         r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01625              ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01626       }
01627       else {
01628         r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01629       }
01630     }
01631     break;
01632 
01633 #ifdef USE_SUBEXP_CALL
01634   case NT_CALL:
01635     r = SIZE_OP_CALL;
01636     break;
01637 #endif
01638 
01639   case NT_QTFR:
01640     r = compile_length_quantifier_node(NQTFR(node), reg);
01641     break;
01642 
01643   case NT_ENCLOSE:
01644     r = compile_length_enclose_node(NENCLOSE(node), reg);
01645     break;
01646 
01647   case NT_ANCHOR:
01648     r = compile_length_anchor_node(NANCHOR(node), reg);
01649     break;
01650 
01651   default:
01652     return ONIGERR_TYPE_BUG;
01653     break;
01654   }
01655 
01656   return r;
01657 }
01658 
01659 static int
01660 compile_tree(Node* node, regex_t* reg)
01661 {
01662   int n, type, len, pos, r = 0;
01663 
01664   type = NTYPE(node);
01665   switch (type) {
01666   case NT_LIST:
01667     do {
01668       r = compile_tree(NCAR(node), reg);
01669     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01670     break;
01671 
01672   case NT_ALT:
01673     {
01674       Node* x = node;
01675       len = 0;
01676       do {
01677         len += compile_length_tree(NCAR(x), reg);
01678         if (NCDR(x) != NULL) {
01679           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01680         }
01681       } while (IS_NOT_NULL(x = NCDR(x)));
01682       pos = reg->used + len;  /* goal position */
01683 
01684       do {
01685         len = compile_length_tree(NCAR(node), reg);
01686         if (IS_NOT_NULL(NCDR(node))) {
01687           r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01688           if (r) break;
01689         }
01690         r = compile_tree(NCAR(node), reg);
01691         if (r) break;
01692         if (IS_NOT_NULL(NCDR(node))) {
01693           len = pos - (reg->used + SIZE_OP_JUMP);
01694           r = add_opcode_rel_addr(reg, OP_JUMP, len);
01695           if (r) break;
01696         }
01697       } while (IS_NOT_NULL(node = NCDR(node)));
01698     }
01699     break;
01700 
01701   case NT_STR:
01702     if (NSTRING_IS_RAW(node))
01703       r = compile_string_raw_node(NSTR(node), reg);
01704     else
01705       r = compile_string_node(node, reg);
01706     break;
01707 
01708   case NT_CCLASS:
01709     r = compile_cclass_node(NCCLASS(node), reg);
01710     break;
01711 
01712   case NT_CTYPE:
01713     {
01714       int op;
01715 
01716       switch (NCTYPE(node)->ctype) {
01717       case ONIGENC_CTYPE_WORD:
01718         if (NCTYPE(node)->ascii_range != 0) {
01719           if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
01720           else                         op = OP_ASCII_WORD;
01721         }
01722         else {
01723           if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
01724           else                         op = OP_WORD;
01725         }
01726         break;
01727       default:
01728         return ONIGERR_TYPE_BUG;
01729         break;
01730       }
01731       r = add_opcode(reg, op);
01732     }
01733     break;
01734 
01735   case NT_CANY:
01736     if (IS_MULTILINE(reg->options))
01737       r = add_opcode(reg, OP_ANYCHAR_ML);
01738     else
01739       r = add_opcode(reg, OP_ANYCHAR);
01740     break;
01741 
01742   case NT_BREF:
01743     {
01744       BRefNode* br = NBREF(node);
01745 
01746 #ifdef USE_BACKREF_WITH_LEVEL
01747       if (IS_BACKREF_NEST_LEVEL(br)) {
01748         r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01749         if (r) return r;
01750         r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01751         if (r) return r;
01752         r = add_length(reg, br->nest_level);
01753         if (r) return r;
01754 
01755         goto add_bacref_mems;
01756       }
01757       else
01758 #endif
01759       if (br->back_num == 1) {
01760         n = br->back_static[0];
01761         if (IS_IGNORECASE(reg->options)) {
01762           r = add_opcode(reg, OP_BACKREFN_IC);
01763           if (r) return r;
01764           r = add_mem_num(reg, n);
01765         }
01766         else {
01767           switch (n) {
01768           case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01769           case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01770           default:
01771             r = add_opcode(reg, OP_BACKREFN);
01772             if (r) return r;
01773             r = add_mem_num(reg, n);
01774             break;
01775           }
01776         }
01777       }
01778       else {
01779         int i;
01780         int* p;
01781 
01782         if (IS_IGNORECASE(reg->options)) {
01783           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01784         }
01785         else {
01786           r = add_opcode(reg, OP_BACKREF_MULTI);
01787         }
01788         if (r) return r;
01789 
01790 #ifdef USE_BACKREF_WITH_LEVEL
01791       add_bacref_mems:
01792 #endif
01793         r = add_length(reg, br->back_num);
01794         if (r) return r;
01795         p = BACKREFS_P(br);
01796         for (i = br->back_num - 1; i >= 0; i--) {
01797           r = add_mem_num(reg, p[i]);
01798           if (r) return r;
01799         }
01800       }
01801     }
01802     break;
01803 
01804 #ifdef USE_SUBEXP_CALL
01805   case NT_CALL:
01806     r = compile_call(NCALL(node), reg);
01807     break;
01808 #endif
01809 
01810   case NT_QTFR:
01811     r = compile_quantifier_node(NQTFR(node), reg);
01812     break;
01813 
01814   case NT_ENCLOSE:
01815     r = compile_enclose_node(NENCLOSE(node), reg);
01816     break;
01817 
01818   case NT_ANCHOR:
01819     r = compile_anchor_node(NANCHOR(node), reg);
01820     break;
01821 
01822   default:
01823 #ifdef ONIG_DEBUG
01824     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01825 #endif
01826     break;
01827   }
01828 
01829   return r;
01830 }
01831 
01832 #ifdef USE_NAMED_GROUP
01833 
01834 static int
01835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01836 {
01837   int r = 0;
01838   Node* node = *plink;
01839 
01840   switch (NTYPE(node)) {
01841   case NT_LIST:
01842   case NT_ALT:
01843     do {
01844       r = noname_disable_map(&(NCAR(node)), map, counter);
01845     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01846     break;
01847 
01848   case NT_QTFR:
01849     {
01850       Node** ptarget = &(NQTFR(node)->target);
01851       Node*  old = *ptarget;
01852       r = noname_disable_map(ptarget, map, counter);
01853       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01854         onig_reduce_nested_quantifier(node, *ptarget);
01855       }
01856     }
01857     break;
01858 
01859   case NT_ENCLOSE:
01860     {
01861       EncloseNode* en = NENCLOSE(node);
01862       if (en->type == ENCLOSE_MEMORY) {
01863         if (IS_ENCLOSE_NAMED_GROUP(en)) {
01864           (*counter)++;
01865           map[en->regnum].new_val = *counter;
01866           en->regnum = *counter;
01867           r = noname_disable_map(&(en->target), map, counter);
01868         }
01869         else {
01870           *plink = en->target;
01871           en->target = NULL_NODE;
01872           onig_node_free(node);
01873           r = noname_disable_map(plink, map, counter);
01874         }
01875       }
01876       else
01877         r = noname_disable_map(&(en->target), map, counter);
01878     }
01879     break;
01880 
01881   case NT_ANCHOR:
01882     {
01883       AnchorNode* an = NANCHOR(node);
01884       switch (an->type) {
01885       case ANCHOR_PREC_READ:
01886       case ANCHOR_PREC_READ_NOT:
01887       case ANCHOR_LOOK_BEHIND:
01888       case ANCHOR_LOOK_BEHIND_NOT:
01889         r = noname_disable_map(&(an->target), map, counter);
01890         break;
01891       }
01892     }
01893     break;
01894 
01895   default:
01896     break;
01897   }
01898 
01899   return r;
01900 }
01901 
01902 static int
01903 renumber_node_backref(Node* node, GroupNumRemap* map)
01904 {
01905   int i, pos, n, old_num;
01906   int *backs;
01907   BRefNode* bn = NBREF(node);
01908 
01909   if (! IS_BACKREF_NAME_REF(bn))
01910     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01911 
01912   old_num = bn->back_num;
01913   if (IS_NULL(bn->back_dynamic))
01914     backs = bn->back_static;
01915   else
01916     backs = bn->back_dynamic;
01917 
01918   for (i = 0, pos = 0; i < old_num; i++) {
01919     n = map[backs[i]].new_val;
01920     if (n > 0) {
01921       backs[pos] = n;
01922       pos++;
01923     }
01924   }
01925 
01926   bn->back_num = pos;
01927   return 0;
01928 }
01929 
01930 static int
01931 renumber_by_map(Node* node, GroupNumRemap* map)
01932 {
01933   int r = 0;
01934 
01935   switch (NTYPE(node)) {
01936   case NT_LIST:
01937   case NT_ALT:
01938     do {
01939       r = renumber_by_map(NCAR(node), map);
01940     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01941     break;
01942   case NT_QTFR:
01943     r = renumber_by_map(NQTFR(node)->target, map);
01944     break;
01945   case NT_ENCLOSE:
01946     {
01947       EncloseNode* en = NENCLOSE(node);
01948       if (en->type == ENCLOSE_CONDITION)
01949         en->regnum = map[en->regnum].new_val;
01950       r = renumber_by_map(en->target, map);
01951     }
01952     break;
01953 
01954   case NT_BREF:
01955     r = renumber_node_backref(node, map);
01956     break;
01957 
01958   case NT_ANCHOR:
01959     {
01960       AnchorNode* an = NANCHOR(node);
01961       switch (an->type) {
01962       case ANCHOR_PREC_READ:
01963       case ANCHOR_PREC_READ_NOT:
01964       case ANCHOR_LOOK_BEHIND:
01965       case ANCHOR_LOOK_BEHIND_NOT:
01966         r = renumber_by_map(an->target, map);
01967         break;
01968       }
01969     }
01970     break;
01971 
01972   default:
01973     break;
01974   }
01975 
01976   return r;
01977 }
01978 
01979 static int
01980 numbered_ref_check(Node* node)
01981 {
01982   int r = 0;
01983 
01984   switch (NTYPE(node)) {
01985   case NT_LIST:
01986   case NT_ALT:
01987     do {
01988       r = numbered_ref_check(NCAR(node));
01989     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01990     break;
01991   case NT_QTFR:
01992     r = numbered_ref_check(NQTFR(node)->target);
01993     break;
01994   case NT_ENCLOSE:
01995     r = numbered_ref_check(NENCLOSE(node)->target);
01996     break;
01997 
01998   case NT_BREF:
01999     if (! IS_BACKREF_NAME_REF(NBREF(node)))
02000       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
02001     break;
02002 
02003   default:
02004     break;
02005   }
02006 
02007   return r;
02008 }
02009 
02010 static int
02011 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
02012 {
02013   int r, i, pos, counter;
02014   BitStatusType loc;
02015   GroupNumRemap* map;
02016 
02017   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
02018   CHECK_NULL_RETURN_MEMERR(map);
02019   for (i = 1; i <= env->num_mem; i++) {
02020     map[i].new_val = 0;
02021   }
02022   counter = 0;
02023   r = noname_disable_map(root, map, &counter);
02024   if (r != 0) return r;
02025 
02026   r = renumber_by_map(*root, map);
02027   if (r != 0) return r;
02028 
02029   for (i = 1, pos = 1; i <= env->num_mem; i++) {
02030     if (map[i].new_val > 0) {
02031       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
02032       pos++;
02033     }
02034   }
02035 
02036   loc = env->capture_history;
02037   BIT_STATUS_CLEAR(env->capture_history);
02038   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
02039     if (BIT_STATUS_AT(loc, i)) {
02040       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
02041     }
02042   }
02043 
02044   env->num_mem = env->num_named;
02045   reg->num_mem = env->num_named;
02046 
02047   return onig_renumber_name_table(reg, map);
02048 }
02049 #endif /* USE_NAMED_GROUP */
02050 
02051 #ifdef USE_SUBEXP_CALL
02052 static int
02053 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
02054 {
02055   int i, offset;
02056   EncloseNode* en;
02057   AbsAddrType addr;
02058 
02059   for (i = 0; i < uslist->num; i++) {
02060     en = NENCLOSE(uslist->us[i].target);
02061     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
02062     addr = en->call_addr;
02063     offset = uslist->us[i].offset;
02064 
02065     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
02066   }
02067   return 0;
02068 }
02069 #endif
02070 
02071 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02072 static int
02073 quantifiers_memory_node_info(Node* node)
02074 {
02075   int r = 0;
02076 
02077   switch (NTYPE(node)) {
02078   case NT_LIST:
02079   case NT_ALT:
02080     {
02081       int v;
02082       do {
02083         v = quantifiers_memory_node_info(NCAR(node));
02084         if (v > r) r = v;
02085       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02086     }
02087     break;
02088 
02089 #ifdef USE_SUBEXP_CALL
02090   case NT_CALL:
02091     if (IS_CALL_RECURSION(NCALL(node))) {
02092       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
02093     }
02094     else
02095       r = quantifiers_memory_node_info(NCALL(node)->target);
02096     break;
02097 #endif
02098 
02099   case NT_QTFR:
02100     {
02101       QtfrNode* qn = NQTFR(node);
02102       if (qn->upper != 0) {
02103         r = quantifiers_memory_node_info(qn->target);
02104       }
02105     }
02106     break;
02107 
02108   case NT_ENCLOSE:
02109     {
02110       EncloseNode* en = NENCLOSE(node);
02111       switch (en->type) {
02112       case ENCLOSE_MEMORY:
02113         return NQ_TARGET_IS_EMPTY_MEM;
02114         break;
02115 
02116       case ENCLOSE_OPTION:
02117       case ENCLOSE_STOP_BACKTRACK:
02118       case ENCLOSE_CONDITION:
02119         r = quantifiers_memory_node_info(en->target);
02120         break;
02121       default:
02122         break;
02123       }
02124     }
02125     break;
02126 
02127   case NT_BREF:
02128   case NT_STR:
02129   case NT_CTYPE:
02130   case NT_CCLASS:
02131   case NT_CANY:
02132   case NT_ANCHOR:
02133   default:
02134     break;
02135   }
02136 
02137   return r;
02138 }
02139 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
02140 
02141 static int
02142 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02143 {
02144   OnigDistance tmin;
02145   int r = 0;
02146 
02147   *min = 0;
02148   switch (NTYPE(node)) {
02149   case NT_BREF:
02150     {
02151       int i;
02152       int* backs;
02153       Node** nodes = SCANENV_MEM_NODES(env);
02154       BRefNode* br = NBREF(node);
02155       if (br->state & NST_RECURSION) break;
02156 
02157       backs = BACKREFS_P(br);
02158       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02159       r = get_min_match_length(nodes[backs[0]], min, env);
02160       if (r != 0) break;
02161       for (i = 1; i < br->back_num; i++) {
02162         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02163         r = get_min_match_length(nodes[backs[i]], &tmin, env);
02164         if (r != 0) break;
02165         if (*min > tmin) *min = tmin;
02166       }
02167     }
02168     break;
02169 
02170 #ifdef USE_SUBEXP_CALL
02171   case NT_CALL:
02172     if (IS_CALL_RECURSION(NCALL(node))) {
02173       EncloseNode* en = NENCLOSE(NCALL(node)->target);
02174       if (IS_ENCLOSE_MIN_FIXED(en))
02175         *min = en->min_len;
02176     }
02177     else
02178       r = get_min_match_length(NCALL(node)->target, min, env);
02179     break;
02180 #endif
02181 
02182   case NT_LIST:
02183     do {
02184       r = get_min_match_length(NCAR(node), &tmin, env);
02185       if (r == 0) *min += tmin;
02186     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02187     break;
02188 
02189   case NT_ALT:
02190     {
02191       Node *x, *y;
02192       y = node;
02193       do {
02194         x = NCAR(y);
02195         r = get_min_match_length(x, &tmin, env);
02196         if (r != 0) break;
02197         if (y == node) *min = tmin;
02198         else if (*min > tmin) *min = tmin;
02199       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02200     }
02201     break;
02202 
02203   case NT_STR:
02204     {
02205       StrNode* sn = NSTR(node);
02206       *min = sn->end - sn->s;
02207     }
02208     break;
02209 
02210   case NT_CTYPE:
02211     *min = 1;
02212     break;
02213 
02214   case NT_CCLASS:
02215   case NT_CANY:
02216     *min = 1;
02217     break;
02218 
02219   case NT_QTFR:
02220     {
02221       QtfrNode* qn = NQTFR(node);
02222 
02223       if (qn->lower > 0) {
02224         r = get_min_match_length(qn->target, min, env);
02225         if (r == 0)
02226           *min = distance_multiply(*min, qn->lower);
02227       }
02228     }
02229     break;
02230 
02231   case NT_ENCLOSE:
02232     {
02233       EncloseNode* en = NENCLOSE(node);
02234       switch (en->type) {
02235       case ENCLOSE_MEMORY:
02236 #ifdef USE_SUBEXP_CALL
02237         if (IS_ENCLOSE_MIN_FIXED(en))
02238           *min = en->min_len;
02239         else {
02240           r = get_min_match_length(en->target, min, env);
02241           if (r == 0) {
02242             en->min_len = *min;
02243             SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02244           }
02245         }
02246         break;
02247 #endif
02248       case ENCLOSE_OPTION:
02249       case ENCLOSE_STOP_BACKTRACK:
02250       case ENCLOSE_CONDITION:
02251         r = get_min_match_length(en->target, min, env);
02252         break;
02253       }
02254     }
02255     break;
02256 
02257   case NT_ANCHOR:
02258   default:
02259     break;
02260   }
02261 
02262   return r;
02263 }
02264 
02265 static int
02266 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02267 {
02268   OnigDistance tmax;
02269   int r = 0;
02270 
02271   *max = 0;
02272   switch (NTYPE(node)) {
02273   case NT_LIST:
02274     do {
02275       r = get_max_match_length(NCAR(node), &tmax, env);
02276       if (r == 0)
02277         *max = distance_add(*max, tmax);
02278     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02279     break;
02280 
02281   case NT_ALT:
02282     do {
02283       r = get_max_match_length(NCAR(node), &tmax, env);
02284       if (r == 0 && *max < tmax) *max = tmax;
02285     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02286     break;
02287 
02288   case NT_STR:
02289     {
02290       StrNode* sn = NSTR(node);
02291       *max = sn->end - sn->s;
02292     }
02293     break;
02294 
02295   case NT_CTYPE:
02296     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02297     break;
02298 
02299   case NT_CCLASS:
02300   case NT_CANY:
02301     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02302     break;
02303 
02304   case NT_BREF:
02305     {
02306       int i;
02307       int* backs;
02308       Node** nodes = SCANENV_MEM_NODES(env);
02309       BRefNode* br = NBREF(node);
02310       if (br->state & NST_RECURSION) {
02311         *max = ONIG_INFINITE_DISTANCE;
02312         break;
02313       }
02314       backs = BACKREFS_P(br);
02315       for (i = 0; i < br->back_num; i++) {
02316         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02317         r = get_max_match_length(nodes[backs[i]], &tmax, env);
02318         if (r != 0) break;
02319         if (*max < tmax) *max = tmax;
02320       }
02321     }
02322     break;
02323 
02324 #ifdef USE_SUBEXP_CALL
02325   case NT_CALL:
02326     if (! IS_CALL_RECURSION(NCALL(node)))
02327       r = get_max_match_length(NCALL(node)->target, max, env);
02328     else
02329       *max = ONIG_INFINITE_DISTANCE;
02330     break;
02331 #endif
02332 
02333   case NT_QTFR:
02334     {
02335       QtfrNode* qn = NQTFR(node);
02336 
02337       if (qn->upper != 0) {
02338         r = get_max_match_length(qn->target, max, env);
02339         if (r == 0 && *max != 0) {
02340           if (! IS_REPEAT_INFINITE(qn->upper))
02341             *max = distance_multiply(*max, qn->upper);
02342           else
02343             *max = ONIG_INFINITE_DISTANCE;
02344         }
02345       }
02346     }
02347     break;
02348 
02349   case NT_ENCLOSE:
02350     {
02351       EncloseNode* en = NENCLOSE(node);
02352       switch (en->type) {
02353       case ENCLOSE_MEMORY:
02354 #ifdef USE_SUBEXP_CALL
02355         if (IS_ENCLOSE_MAX_FIXED(en))
02356           *max = en->max_len;
02357         else {
02358           r = get_max_match_length(en->target, max, env);
02359           if (r == 0) {
02360             en->max_len = *max;
02361             SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02362           }
02363         }
02364         break;
02365 #endif
02366       case ENCLOSE_OPTION:
02367       case ENCLOSE_STOP_BACKTRACK:
02368       case ENCLOSE_CONDITION:
02369         r = get_max_match_length(en->target, max, env);
02370         break;
02371       }
02372     }
02373     break;
02374 
02375   case NT_ANCHOR:
02376   default:
02377     break;
02378   }
02379 
02380   return r;
02381 }
02382 
02383 #define GET_CHAR_LEN_VARLEN           -1
02384 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02385 
02386 /* fixed size pattern node only */
02387 static int
02388 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02389 {
02390   int tlen;
02391   int r = 0;
02392 
02393   level++;
02394   *len = 0;
02395   switch (NTYPE(node)) {
02396   case NT_LIST:
02397     do {
02398       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02399       if (r == 0)
02400         *len = (int )distance_add(*len, tlen);
02401     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02402     break;
02403 
02404   case NT_ALT:
02405     {
02406       int tlen2;
02407       int varlen = 0;
02408 
02409       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02410       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02411         r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02412         if (r == 0) {
02413           if (tlen != tlen2)
02414             varlen = 1;
02415         }
02416       }
02417       if (r == 0) {
02418         if (varlen != 0) {
02419           if (level == 1)
02420             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02421           else
02422             r = GET_CHAR_LEN_VARLEN;
02423         }
02424         else
02425           *len = tlen;
02426       }
02427     }
02428     break;
02429 
02430   case NT_STR:
02431     {
02432       StrNode* sn = NSTR(node);
02433       UChar *s = sn->s;
02434       while (s < sn->end) {
02435         s += enclen(reg->enc, s, sn->end);
02436         (*len)++;
02437       }
02438     }
02439     break;
02440 
02441   case NT_QTFR:
02442     {
02443       QtfrNode* qn = NQTFR(node);
02444       if (qn->lower == qn->upper) {
02445         r = get_char_length_tree1(qn->target, reg, &tlen, level);
02446         if (r == 0)
02447           *len = (int )distance_multiply(tlen, qn->lower);
02448       }
02449       else
02450         r = GET_CHAR_LEN_VARLEN;
02451     }
02452     break;
02453 
02454 #ifdef USE_SUBEXP_CALL
02455   case NT_CALL:
02456     if (! IS_CALL_RECURSION(NCALL(node)))
02457       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02458     else
02459       r = GET_CHAR_LEN_VARLEN;
02460     break;
02461 #endif
02462 
02463   case NT_CTYPE:
02464     *len = 1;
02465     break;
02466 
02467   case NT_CCLASS:
02468   case NT_CANY:
02469     *len = 1;
02470     break;
02471 
02472   case NT_ENCLOSE:
02473     {
02474       EncloseNode* en = NENCLOSE(node);
02475       switch (en->type) {
02476       case ENCLOSE_MEMORY:
02477 #ifdef USE_SUBEXP_CALL
02478         if (IS_ENCLOSE_CLEN_FIXED(en))
02479           *len = en->char_len;
02480         else {
02481           r = get_char_length_tree1(en->target, reg, len, level);
02482           if (r == 0) {
02483             en->char_len = *len;
02484             SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02485           }
02486         }
02487         break;
02488 #endif
02489       case ENCLOSE_OPTION:
02490       case ENCLOSE_STOP_BACKTRACK:
02491       case ENCLOSE_CONDITION:
02492         r = get_char_length_tree1(en->target, reg, len, level);
02493         break;
02494       default:
02495         break;
02496       }
02497     }
02498     break;
02499 
02500   case NT_ANCHOR:
02501     break;
02502 
02503   default:
02504     r = GET_CHAR_LEN_VARLEN;
02505     break;
02506   }
02507 
02508   return r;
02509 }
02510 
02511 static int
02512 get_char_length_tree(Node* node, regex_t* reg, int* len)
02513 {
02514   return get_char_length_tree1(node, reg, len, 0);
02515 }
02516 
02517 /* x is not included y ==>  1 : 0 */
02518 static int
02519 is_not_included(Node* x, Node* y, regex_t* reg)
02520 {
02521   int i;
02522   OnigDistance len;
02523   OnigCodePoint code;
02524   UChar *p;
02525   int ytype;
02526 
02527  retry:
02528   ytype = NTYPE(y);
02529   switch (NTYPE(x)) {
02530   case NT_CTYPE:
02531     {
02532       switch (ytype) {
02533       case NT_CTYPE:
02534         if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02535             NCTYPE(y)->not   != NCTYPE(x)->not &&
02536             NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
02537           return 1;
02538         else
02539           return 0;
02540         break;
02541 
02542       case NT_CCLASS:
02543       swap:
02544         {
02545           Node* tmp;
02546           tmp = x; x = y; y = tmp;
02547           goto retry;
02548         }
02549         break;
02550 
02551       case NT_STR:
02552         goto swap;
02553         break;
02554 
02555       default:
02556         break;
02557       }
02558     }
02559     break;
02560 
02561   case NT_CCLASS:
02562     {
02563       CClassNode* xc = NCCLASS(x);
02564       switch (ytype) {
02565       case NT_CTYPE:
02566         switch (NCTYPE(y)->ctype) {
02567         case ONIGENC_CTYPE_WORD:
02568           if (NCTYPE(y)->not == 0) {
02569             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02570               for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02571                 if (BITSET_AT(xc->bs, i)) {
02572                   if (NCTYPE(y)->ascii_range) {
02573                     if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02574                   }
02575                   else {
02576                     if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
02577                   }
02578                 }
02579               }
02580               return 1;
02581             }
02582             return 0;
02583           }
02584           else {
02585             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02586               int is_word;
02587               if (NCTYPE(y)->ascii_range)
02588                 is_word = IS_CODE_SB_WORD(reg->enc, i);
02589               else
02590                 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
02591               if (! is_word) {
02592                 if (!IS_NCCLASS_NOT(xc)) {
02593                   if (BITSET_AT(xc->bs, i))
02594                     return 0;
02595                 }
02596                 else {
02597                   if (! BITSET_AT(xc->bs, i))
02598                     return 0;
02599                 }
02600               }
02601             }
02602             return 1;
02603           }
02604           break;
02605 
02606         default:
02607           break;
02608         }
02609         break;
02610 
02611       case NT_CCLASS:
02612         {
02613           int v;
02614           CClassNode* yc = NCCLASS(y);
02615 
02616           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02617             v = BITSET_AT(xc->bs, i);
02618             if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02619                 (v == 0 && IS_NCCLASS_NOT(xc))) {
02620               v = BITSET_AT(yc->bs, i);
02621               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02622                   (v == 0 && IS_NCCLASS_NOT(yc)))
02623                 return 0;
02624             }
02625           }
02626           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02627               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02628             return 1;
02629           return 0;
02630         }
02631         break;
02632 
02633       case NT_STR:
02634         goto swap;
02635         break;
02636 
02637       default:
02638         break;
02639       }
02640     }
02641     break;
02642 
02643   case NT_STR:
02644     {
02645       StrNode* xs = NSTR(x);
02646       if (NSTRING_LEN(x) == 0)
02647         break;
02648 
02649       switch (ytype) {
02650       case NT_CTYPE:
02651         switch (NCTYPE(y)->ctype) {
02652         case ONIGENC_CTYPE_WORD:
02653           if (NCTYPE(y)->ascii_range) {
02654             if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
02655               return NCTYPE(y)->not;
02656             else
02657               return !(NCTYPE(y)->not);
02658           }
02659           else {
02660             if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02661               return NCTYPE(y)->not;
02662             else
02663               return !(NCTYPE(y)->not);
02664           }
02665           break;
02666         default:
02667           break;
02668         }
02669         break;
02670 
02671       case NT_CCLASS:
02672         {
02673           CClassNode* cc = NCCLASS(y);
02674 
02675           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02676                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02677           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02678         }
02679         break;
02680 
02681       case NT_STR:
02682         {
02683           UChar *q;
02684           StrNode* ys = NSTR(y);
02685           len = NSTRING_LEN(x);
02686           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02687           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02688             /* tiny version */
02689             return 0;
02690           }
02691           else {
02692             for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
02693               if (*p != *q) return 1;
02694             }
02695           }
02696         }
02697         break;
02698 
02699       default:
02700         break;
02701       }
02702     }
02703     break;
02704 
02705   default:
02706     break;
02707   }
02708 
02709   return 0;
02710 }
02711 
02712 static Node*
02713 get_head_value_node(Node* node, int exact, regex_t* reg)
02714 {
02715   Node* n = NULL_NODE;
02716 
02717   switch (NTYPE(node)) {
02718   case NT_BREF:
02719   case NT_ALT:
02720   case NT_CANY:
02721 #ifdef USE_SUBEXP_CALL
02722   case NT_CALL:
02723 #endif
02724     break;
02725 
02726   case NT_CTYPE:
02727   case NT_CCLASS:
02728     if (exact == 0) {
02729       n = node;
02730     }
02731     break;
02732 
02733   case NT_LIST:
02734     n = get_head_value_node(NCAR(node), exact, reg);
02735     break;
02736 
02737   case NT_STR:
02738     {
02739       StrNode* sn = NSTR(node);
02740 
02741       if (sn->end <= sn->s)
02742         break;
02743 
02744       if (exact != 0 &&
02745           !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02746       }
02747       else {
02748         n = node;
02749       }
02750     }
02751     break;
02752 
02753   case NT_QTFR:
02754     {
02755       QtfrNode* qn = NQTFR(node);
02756       if (qn->lower > 0) {
02757         if (IS_NOT_NULL(qn->head_exact))
02758           n = qn->head_exact;
02759         else
02760           n = get_head_value_node(qn->target, exact, reg);
02761       }
02762     }
02763     break;
02764 
02765   case NT_ENCLOSE:
02766     {
02767       EncloseNode* en = NENCLOSE(node);
02768       switch (en->type) {
02769       case ENCLOSE_OPTION:
02770         {
02771           OnigOptionType options = reg->options;
02772 
02773           reg->options = NENCLOSE(node)->option;
02774           n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02775           reg->options = options;
02776         }
02777         break;
02778 
02779       case ENCLOSE_MEMORY:
02780       case ENCLOSE_STOP_BACKTRACK:
02781       case ENCLOSE_CONDITION:
02782         n = get_head_value_node(en->target, exact, reg);
02783         break;
02784       }
02785     }
02786     break;
02787 
02788   case NT_ANCHOR:
02789     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02790       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02791     break;
02792 
02793   default:
02794     break;
02795   }
02796 
02797   return n;
02798 }
02799 
02800 static int
02801 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02802 {
02803   int type, r = 0;
02804 
02805   type = NTYPE(node);
02806   if ((NTYPE2BIT(type) & type_mask) == 0)
02807     return 1;
02808 
02809   switch (type) {
02810   case NT_LIST:
02811   case NT_ALT:
02812     do {
02813       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02814                           anchor_mask);
02815     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02816     break;
02817 
02818   case NT_QTFR:
02819     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02820                         anchor_mask);
02821     break;
02822 
02823   case NT_ENCLOSE:
02824     {
02825       EncloseNode* en = NENCLOSE(node);
02826       if ((en->type & enclose_mask) == 0)
02827         return 1;
02828 
02829       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02830     }
02831     break;
02832 
02833   case NT_ANCHOR:
02834     type = NANCHOR(node)->type;
02835     if ((type & anchor_mask) == 0)
02836       return 1;
02837 
02838     if (NANCHOR(node)->target)
02839       r = check_type_tree(NANCHOR(node)->target,
02840                           type_mask, enclose_mask, anchor_mask);
02841     break;
02842 
02843   default:
02844     break;
02845   }
02846   return r;
02847 }
02848 
02849 #ifdef USE_SUBEXP_CALL
02850 
02851 #define RECURSION_EXIST       1
02852 #define RECURSION_INFINITE    2
02853 
02854 static int
02855 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02856 {
02857   int type;
02858   int r = 0;
02859 
02860   type = NTYPE(node);
02861   switch (type) {
02862   case NT_LIST:
02863     {
02864       Node *x;
02865       OnigDistance min;
02866       int ret;
02867 
02868       x = node;
02869       do {
02870         ret = subexp_inf_recursive_check(NCAR(x), env, head);
02871         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02872         r |= ret;
02873         if (head) {
02874           ret = get_min_match_length(NCAR(x), &min, env);
02875           if (ret != 0) return ret;
02876           if (min != 0) head = 0;
02877         }
02878       } while (IS_NOT_NULL(x = NCDR(x)));
02879     }
02880     break;
02881 
02882   case NT_ALT:
02883     {
02884       int ret;
02885       r = RECURSION_EXIST;
02886       do {
02887         ret = subexp_inf_recursive_check(NCAR(node), env, head);
02888         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02889         r &= ret;
02890       } while (IS_NOT_NULL(node = NCDR(node)));
02891     }
02892     break;
02893 
02894   case NT_QTFR:
02895     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02896     if (r == RECURSION_EXIST) {
02897       if (NQTFR(node)->lower == 0) r = 0;
02898     }
02899     break;
02900 
02901   case NT_ANCHOR:
02902     {
02903       AnchorNode* an = NANCHOR(node);
02904       switch (an->type) {
02905       case ANCHOR_PREC_READ:
02906       case ANCHOR_PREC_READ_NOT:
02907       case ANCHOR_LOOK_BEHIND:
02908       case ANCHOR_LOOK_BEHIND_NOT:
02909         r = subexp_inf_recursive_check(an->target, env, head);
02910         break;
02911       }
02912     }
02913     break;
02914 
02915   case NT_CALL:
02916     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02917     break;
02918 
02919   case NT_ENCLOSE:
02920     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02921       return 0;
02922     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02923       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02924     else {
02925       SET_ENCLOSE_STATUS(node, NST_MARK2);
02926       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02927       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02928     }
02929     break;
02930 
02931   default:
02932     break;
02933   }
02934 
02935   return r;
02936 }
02937 
02938 static int
02939 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02940 {
02941   int type;
02942   int r = 0;
02943 
02944   type = NTYPE(node);
02945   switch (type) {
02946   case NT_LIST:
02947   case NT_ALT:
02948     do {
02949       r = subexp_inf_recursive_check_trav(NCAR(node), env);
02950     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02951     break;
02952 
02953   case NT_QTFR:
02954     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02955     break;
02956 
02957   case NT_ANCHOR:
02958     {
02959       AnchorNode* an = NANCHOR(node);
02960       switch (an->type) {
02961       case ANCHOR_PREC_READ:
02962       case ANCHOR_PREC_READ_NOT:
02963       case ANCHOR_LOOK_BEHIND:
02964       case ANCHOR_LOOK_BEHIND_NOT:
02965         r = subexp_inf_recursive_check_trav(an->target, env);
02966         break;
02967       }
02968     }
02969     break;
02970 
02971   case NT_ENCLOSE:
02972     {
02973       EncloseNode* en = NENCLOSE(node);
02974 
02975       if (IS_ENCLOSE_RECURSION(en)) {
02976         SET_ENCLOSE_STATUS(node, NST_MARK1);
02977         r = subexp_inf_recursive_check(en->target, env, 1);
02978         if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02979         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02980       }
02981       r = subexp_inf_recursive_check_trav(en->target, env);
02982     }
02983 
02984     break;
02985 
02986   default:
02987     break;
02988   }
02989 
02990   return r;
02991 }
02992 
02993 static int
02994 subexp_recursive_check(Node* node)
02995 {
02996   int r = 0;
02997 
02998   switch (NTYPE(node)) {
02999   case NT_LIST:
03000   case NT_ALT:
03001     do {
03002       r |= subexp_recursive_check(NCAR(node));
03003     } while (IS_NOT_NULL(node = NCDR(node)));
03004     break;
03005 
03006   case NT_QTFR:
03007     r = subexp_recursive_check(NQTFR(node)->target);
03008     break;
03009 
03010   case NT_ANCHOR:
03011     {
03012       AnchorNode* an = NANCHOR(node);
03013       switch (an->type) {
03014       case ANCHOR_PREC_READ:
03015       case ANCHOR_PREC_READ_NOT:
03016       case ANCHOR_LOOK_BEHIND:
03017       case ANCHOR_LOOK_BEHIND_NOT:
03018         r = subexp_recursive_check(an->target);
03019         break;
03020       }
03021     }
03022     break;
03023 
03024   case NT_CALL:
03025     r = subexp_recursive_check(NCALL(node)->target);
03026     if (r != 0) SET_CALL_RECURSION(node);
03027     break;
03028 
03029   case NT_ENCLOSE:
03030     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
03031       return 0;
03032     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
03033       return 1; /* recursion */
03034     else {
03035       SET_ENCLOSE_STATUS(node, NST_MARK2);
03036       r = subexp_recursive_check(NENCLOSE(node)->target);
03037       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
03038     }
03039     break;
03040 
03041   default:
03042     break;
03043   }
03044 
03045   return r;
03046 }
03047 
03048 
03049 static int
03050 subexp_recursive_check_trav(Node* node, ScanEnv* env)
03051 {
03052 #define FOUND_CALLED_NODE    1
03053 
03054   int type;
03055   int r = 0;
03056 
03057   type = NTYPE(node);
03058   switch (type) {
03059   case NT_LIST:
03060   case NT_ALT:
03061     {
03062       int ret;
03063       do {
03064         ret = subexp_recursive_check_trav(NCAR(node), env);
03065         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
03066         else if (ret < 0) return ret;
03067       } while (IS_NOT_NULL(node = NCDR(node)));
03068     }
03069     break;
03070 
03071   case NT_QTFR:
03072     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
03073     if (NQTFR(node)->upper == 0) {
03074       if (r == FOUND_CALLED_NODE)
03075         NQTFR(node)->is_refered = 1;
03076     }
03077     break;
03078 
03079   case NT_ANCHOR:
03080     {
03081       AnchorNode* an = NANCHOR(node);
03082       switch (an->type) {
03083       case ANCHOR_PREC_READ:
03084       case ANCHOR_PREC_READ_NOT:
03085       case ANCHOR_LOOK_BEHIND:
03086       case ANCHOR_LOOK_BEHIND_NOT:
03087         r = subexp_recursive_check_trav(an->target, env);
03088         break;
03089       }
03090     }
03091     break;
03092 
03093   case NT_ENCLOSE:
03094     {
03095       EncloseNode* en = NENCLOSE(node);
03096 
03097       if (! IS_ENCLOSE_RECURSION(en)) {
03098         if (IS_ENCLOSE_CALLED(en)) {
03099           SET_ENCLOSE_STATUS(node, NST_MARK1);
03100           r = subexp_recursive_check(en->target);
03101           if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
03102           CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
03103         }
03104       }
03105       r = subexp_recursive_check_trav(en->target, env);
03106       if (IS_ENCLOSE_CALLED(en))
03107         r |= FOUND_CALLED_NODE;
03108     }
03109     break;
03110 
03111   default:
03112     break;
03113   }
03114 
03115   return r;
03116 }
03117 
03118 static int
03119 setup_subexp_call(Node* node, ScanEnv* env)
03120 {
03121   int type;
03122   int r = 0;
03123 
03124   type = NTYPE(node);
03125   switch (type) {
03126   case NT_LIST:
03127     do {
03128       r = setup_subexp_call(NCAR(node), env);
03129     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03130     break;
03131 
03132   case NT_ALT:
03133     do {
03134       r = setup_subexp_call(NCAR(node), env);
03135     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03136     break;
03137 
03138   case NT_QTFR:
03139     r = setup_subexp_call(NQTFR(node)->target, env);
03140     break;
03141   case NT_ENCLOSE:
03142     r = setup_subexp_call(NENCLOSE(node)->target, env);
03143     break;
03144 
03145   case NT_CALL:
03146     {
03147       CallNode* cn = NCALL(node);
03148       Node** nodes = SCANENV_MEM_NODES(env);
03149 
03150       if (cn->group_num != 0) {
03151         int gnum = cn->group_num;
03152 
03153 #ifdef USE_NAMED_GROUP
03154         if (env->num_named > 0 &&
03155             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03156             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03157           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03158         }
03159 #endif
03160         if (gnum > env->num_mem) {
03161           onig_scan_env_set_error_string(env,
03162                  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03163           return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03164         }
03165 
03166 #ifdef USE_NAMED_GROUP
03167       set_call_attr:
03168 #endif
03169         cn->target = nodes[cn->group_num];
03170         if (IS_NULL(cn->target)) {
03171           onig_scan_env_set_error_string(env,
03172                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03173           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03174         }
03175         SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03176         BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03177         cn->unset_addr_list = env->unset_addr_list;
03178       }
03179 #ifdef USE_NAMED_GROUP
03180 #ifdef USE_PERL_SUBEXP_CALL
03181       else if (cn->name == cn->name_end) {
03182         goto set_call_attr;
03183       }
03184 #endif
03185       else {
03186         int *refs;
03187 
03188         int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03189                                            &refs);
03190         if (n <= 0) {
03191           onig_scan_env_set_error_string(env,
03192                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03193           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03194         }
03195         else if (n > 1 &&
03196             ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
03197           onig_scan_env_set_error_string(env,
03198             ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03199           return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03200         }
03201         else {
03202           cn->group_num = refs[0];
03203           goto set_call_attr;
03204         }
03205       }
03206 #endif
03207     }
03208     break;
03209 
03210   case NT_ANCHOR:
03211     {
03212       AnchorNode* an = NANCHOR(node);
03213 
03214       switch (an->type) {
03215       case ANCHOR_PREC_READ:
03216       case ANCHOR_PREC_READ_NOT:
03217       case ANCHOR_LOOK_BEHIND:
03218       case ANCHOR_LOOK_BEHIND_NOT:
03219         r = setup_subexp_call(an->target, env);
03220         break;
03221       }
03222     }
03223     break;
03224 
03225   default:
03226     break;
03227   }
03228 
03229   return r;
03230 }
03231 #endif
03232 
03233 /* divide different length alternatives in look-behind.
03234   (?<=A|B) ==> (?<=A)|(?<=B)
03235   (?<!A|B) ==> (?<!A)(?<!B)
03236 */
03237 static int
03238 divide_look_behind_alternatives(Node* node)
03239 {
03240   Node *head, *np, *insert_node;
03241   AnchorNode* an = NANCHOR(node);
03242   int anc_type = an->type;
03243 
03244   head = an->target;
03245   np = NCAR(head);
03246   swap_node(node, head);
03247   NCAR(node) = head;
03248   NANCHOR(head)->target = np;
03249 
03250   np = node;
03251   while ((np = NCDR(np)) != NULL_NODE) {
03252     insert_node = onig_node_new_anchor(anc_type);
03253     CHECK_NULL_RETURN_MEMERR(insert_node);
03254     NANCHOR(insert_node)->target = NCAR(np);
03255     NCAR(np) = insert_node;
03256   }
03257 
03258   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03259     np = node;
03260     do {
03261       SET_NTYPE(np, NT_LIST);  /* alt -> list */
03262     } while ((np = NCDR(np)) != NULL_NODE);
03263   }
03264   return 0;
03265 }
03266 
03267 static int
03268 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03269 {
03270   int r, len;
03271   AnchorNode* an = NANCHOR(node);
03272 
03273   r = get_char_length_tree(an->target, reg, &len);
03274   if (r == 0)
03275     an->char_len = len;
03276   else if (r == GET_CHAR_LEN_VARLEN)
03277     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03278   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03279     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03280       r = divide_look_behind_alternatives(node);
03281     else
03282       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03283   }
03284 
03285   return r;
03286 }
03287 
03288 static int
03289 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
03290 {
03291   int type;
03292 
03293  retry:
03294   type = NTYPE(node);
03295   if (type == NT_QTFR) {
03296     QtfrNode* qn = NQTFR(node);
03297     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03298 #ifdef USE_QTFR_PEEK_NEXT
03299       Node* n = get_head_value_node(next_node, 1, reg);
03300       /* '\0': for UTF-16BE etc... */
03301       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03302         qn->next_head_exact = n;
03303       }
03304 #endif
03305       /* automatic possessivation a*b ==> (?>a*)b */
03306       if (qn->lower <= 1) {
03307         int ttype = NTYPE(qn->target);
03308         if (IS_NODE_TYPE_SIMPLE(ttype)) {
03309           Node *x, *y;
03310           x = get_head_value_node(qn->target, 0, reg);
03311           if (IS_NOT_NULL(x)) {
03312             y = get_head_value_node(next_node,  0, reg);
03313             if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03314               Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03315               CHECK_NULL_RETURN_MEMERR(en);
03316               SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03317               swap_node(node, en);
03318               NENCLOSE(node)->target = en;
03319             }
03320           }
03321         }
03322       }
03323 
03324 #ifndef ONIG_DONT_OPTIMIZE
03325       if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
03326           in_root && /* qn->lower == 0 && */
03327           NTYPE(qn->target) == NT_CANY &&
03328           ! IS_MULTILINE(reg->options)) {
03329         /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
03330         Node *np;
03331         np = onig_node_new_list(NULL_NODE, NULL_NODE);
03332         CHECK_NULL_RETURN_MEMERR(np);
03333         swap_node(node, np);
03334         NCDR(node) = onig_node_new_list(np, NULL_NODE);
03335         if (IS_NULL(NCDR(node))) {
03336           onig_node_free(np);
03337           return ONIGERR_MEMORY;
03338         }
03339         np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR);   /* (?:^|\G) */
03340         CHECK_NULL_RETURN_MEMERR(np);
03341         NCAR(node) = np;
03342       }
03343 #endif
03344     }
03345   }
03346   else if (type == NT_ENCLOSE) {
03347     EncloseNode* en = NENCLOSE(node);
03348     in_root = 0;
03349     if (en->type == ENCLOSE_MEMORY) {
03350       node = en->target;
03351       goto retry;
03352     }
03353   }
03354   return 0;
03355 }
03356 
03357 
03358 static int
03359 update_string_node_case_fold(regex_t* reg, Node *node)
03360 {
03361   UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03362   UChar *sbuf, *ebuf, *sp;
03363   int r, i, len;
03364   OnigDistance sbuf_size;
03365   StrNode* sn = NSTR(node);
03366 
03367   end = sn->end;
03368   sbuf_size = (end - sn->s) * 2;
03369   sbuf = (UChar* )xmalloc(sbuf_size);
03370   CHECK_NULL_RETURN_MEMERR(sbuf);
03371   ebuf = sbuf + sbuf_size;
03372 
03373   sp = sbuf;
03374   p = sn->s;
03375   while (p < end) {
03376     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03377     for (i = 0; i < len; i++) {
03378       if (sp >= ebuf) {
03379         UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03380         if (IS_NULL(p)) {
03381           xfree(sbuf);
03382           return ONIGERR_MEMORY;
03383         }
03384         sbuf = p;
03385         sp = sbuf + sbuf_size;
03386         sbuf_size *= 2;
03387         ebuf = sbuf + sbuf_size;
03388       }
03389 
03390       *sp++ = buf[i];
03391     }
03392   }
03393 
03394   r = onig_node_str_set(node, sbuf, sp);
03395   if (r != 0) {
03396     xfree(sbuf);
03397     return r;
03398   }
03399 
03400   xfree(sbuf);
03401   return 0;
03402 }
03403 
03404 static int
03405 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03406                                  regex_t* reg)
03407 {
03408   int r;
03409   Node *node;
03410 
03411   node = onig_node_new_str(s, end);
03412   if (IS_NULL(node)) return ONIGERR_MEMORY;
03413 
03414   r = update_string_node_case_fold(reg, node);
03415   if (r != 0) {
03416     onig_node_free(node);
03417     return r;
03418   }
03419 
03420   NSTRING_SET_AMBIG(node);
03421   NSTRING_SET_DONT_GET_OPT_INFO(node);
03422   *rnode = node;
03423   return 0;
03424 }
03425 
03426 static int
03427 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03428                             UChar *p, int slen, UChar *end,
03429                             regex_t* reg, Node **rnode)
03430 {
03431   int r, i, j, len, varlen, varclen;
03432   Node *anode, *var_anode, *snode, *xnode, *an;
03433   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03434 
03435   *rnode = var_anode = NULL_NODE;
03436 
03437   varlen = 0;
03438   varclen = 0;
03439   for (i = 0; i < item_num; i++) {
03440     if (items[i].byte_len != slen) {
03441       varlen = 1;
03442       break;
03443     }
03444     if (items[i].code_len != 1) {
03445       varclen = 1;
03446     }
03447   }
03448 
03449   if (varlen != 0) {
03450     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03451     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03452 
03453     xnode = onig_node_new_list(NULL, NULL);
03454     if (IS_NULL(xnode)) goto mem_err;
03455     NCAR(var_anode) = xnode;
03456 
03457     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03458     if (IS_NULL(anode)) goto mem_err;
03459     NCAR(xnode) = anode;
03460   }
03461   else {
03462     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03463     if (IS_NULL(anode)) return ONIGERR_MEMORY;
03464   }
03465 
03466   snode = onig_node_new_str(p, p + slen);
03467   if (IS_NULL(snode)) goto mem_err;
03468 
03469   NCAR(anode) = snode;
03470 
03471   for (i = 0; i < item_num; i++) {
03472     snode = onig_node_new_str(NULL, NULL);
03473     if (IS_NULL(snode)) goto mem_err;
03474 
03475     for (j = 0; j < items[i].code_len; j++) {
03476       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03477       if (len < 0) {
03478         r = len;
03479         goto mem_err2;
03480       }
03481 
03482       r = onig_node_str_cat(snode, buf, buf + len);
03483       if (r != 0) goto mem_err2;
03484     }
03485 
03486     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03487     if (IS_NULL(an)) {
03488       goto mem_err2;
03489     }
03490 
03491     if (items[i].byte_len != slen) {
03492       Node *rem;
03493       UChar *q = p + items[i].byte_len;
03494 
03495       if (q < end) {
03496         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03497         if (r != 0) {
03498           onig_node_free(an);
03499           goto mem_err2;
03500         }
03501 
03502         xnode = onig_node_list_add(NULL_NODE, snode);
03503         if (IS_NULL(xnode)) {
03504           onig_node_free(an);
03505           onig_node_free(rem);
03506           goto mem_err2;
03507         }
03508         if (IS_NULL(onig_node_list_add(xnode, rem))) {
03509           onig_node_free(an);
03510           onig_node_free(xnode);
03511           onig_node_free(rem);
03512           goto mem_err;
03513         }
03514 
03515         NCAR(an) = xnode;
03516       }
03517       else {
03518         NCAR(an) = snode;
03519       }
03520 
03521       NCDR(var_anode) = an;
03522       var_anode = an;
03523     }
03524     else {
03525       NCAR(an)     = snode;
03526       NCDR(anode) = an;
03527       anode = an;
03528     }
03529   }
03530 
03531   if (varclen && !varlen)
03532     return 2;
03533   return varlen;
03534 
03535  mem_err2:
03536   onig_node_free(snode);
03537 
03538  mem_err:
03539   onig_node_free(*rnode);
03540 
03541   return ONIGERR_MEMORY;
03542 }
03543 
03544 static int
03545 expand_case_fold_string(Node* node, regex_t* reg)
03546 {
03547 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
03548 
03549   int r, n, len, alt_num;
03550   int varlen = 0;
03551   UChar *start, *end, *p;
03552   Node *top_root, *root, *snode, *prev_node;
03553   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03554   StrNode* sn = NSTR(node);
03555 
03556   if (NSTRING_IS_AMBIG(node)) return 0;
03557 
03558   start = sn->s;
03559   end   = sn->end;
03560   if (start >= end) return 0;
03561 
03562   r = 0;
03563   top_root = root = prev_node = snode = NULL_NODE;
03564   alt_num = 1;
03565   p = start;
03566   while (p < end) {
03567     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03568                                            p, end, items);
03569     if (n < 0) {
03570       r = n;
03571       goto err;
03572     }
03573 
03574     len = enclen(reg->enc, p, end);
03575 
03576     if (n == 0) {
03577       if (IS_NULL(snode)) {
03578         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03579           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03580           if (IS_NULL(root)) {
03581             onig_node_free(prev_node);
03582             goto mem_err;
03583           }
03584         }
03585 
03586         prev_node = snode = onig_node_new_str(NULL, NULL);
03587         if (IS_NULL(snode)) goto mem_err;
03588         if (IS_NOT_NULL(root)) {
03589           if (IS_NULL(onig_node_list_add(root, snode))) {
03590             onig_node_free(snode);
03591             goto mem_err;
03592           }
03593         }
03594       }
03595 
03596       r = onig_node_str_cat(snode, p, p + len);
03597       if (r != 0) goto err;
03598     }
03599     else {
03600       alt_num *= (n + 1);
03601       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03602 
03603       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03604         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03605         if (IS_NULL(root)) {
03606           onig_node_free(prev_node);
03607           goto mem_err;
03608         }
03609       }
03610 
03611       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03612       if (r < 0) goto mem_err;
03613       if (r > 0) varlen = 1;
03614       if (r == 1) {
03615         if (IS_NULL(root)) {
03616           top_root = prev_node;
03617         }
03618         else {
03619           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03620             onig_node_free(prev_node);
03621             goto mem_err;
03622           }
03623         }
03624 
03625         root = NCAR(prev_node);
03626       }
03627       else { /* r == 0 || r == 2 */
03628         if (IS_NOT_NULL(root)) {
03629           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03630             onig_node_free(prev_node);
03631             goto mem_err;
03632           }
03633         }
03634       }
03635 
03636       snode = NULL_NODE;
03637     }
03638 
03639     p += len;
03640   }
03641 
03642   if (p < end) {
03643     Node *srem;
03644 
03645     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03646     if (r != 0) goto mem_err;
03647 
03648     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03649       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03650       if (IS_NULL(root)) {
03651         onig_node_free(srem);
03652         onig_node_free(prev_node);
03653         goto mem_err;
03654       }
03655     }
03656 
03657     if (IS_NULL(root)) {
03658       prev_node = srem;
03659     }
03660     else {
03661       if (IS_NULL(onig_node_list_add(root, srem))) {
03662         onig_node_free(srem);
03663         goto mem_err;
03664       }
03665     }
03666   }
03667 
03668   /* ending */
03669   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03670   if (!varlen) {
03671     /* When all expanded strings are same length, case-insensitive
03672        BM search will be used. */
03673     r = update_string_node_case_fold(reg, node);
03674     if (r == 0) {
03675       NSTRING_SET_AMBIG(node);
03676     }
03677   }
03678   else {
03679     swap_node(node, top_root);
03680     r = 0;
03681   }
03682   onig_node_free(top_root);
03683   return r;
03684 
03685  mem_err:
03686   r = ONIGERR_MEMORY;
03687 
03688  err:
03689   onig_node_free(top_root);
03690   return r;
03691 }
03692 
03693 
03694 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03695 
03696 #define CEC_THRES_NUM_BIG_REPEAT         512
03697 #define CEC_INFINITE_NUM          0x7fffffff
03698 
03699 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03700 #define CEC_IN_FINITE_REPEAT      (1<<1)
03701 #define CEC_CONT_BIG_REPEAT       (1<<2)
03702 
03703 static int
03704 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03705 {
03706   int type;
03707   int r = state;
03708 
03709   type = NTYPE(node);
03710   switch (type) {
03711   case NT_LIST:
03712     {
03713       Node* prev = NULL_NODE;
03714       do {
03715         r = setup_comb_exp_check(NCAR(node), r, env);
03716         prev = NCAR(node);
03717       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03718     }
03719     break;
03720 
03721   case NT_ALT:
03722     {
03723       int ret;
03724       do {
03725         ret = setup_comb_exp_check(NCAR(node), state, env);
03726         r |= ret;
03727       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03728     }
03729     break;
03730 
03731   case NT_QTFR:
03732     {
03733       int child_state = state;
03734       int add_state = 0;
03735       QtfrNode* qn = NQTFR(node);
03736       Node* target = qn->target;
03737       int var_num;
03738 
03739       if (! IS_REPEAT_INFINITE(qn->upper)) {
03740         if (qn->upper > 1) {
03741           /* {0,1}, {1,1} are allowed */
03742           child_state |= CEC_IN_FINITE_REPEAT;
03743 
03744           /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03745           if (env->backrefed_mem == 0) {
03746             if (NTYPE(qn->target) == NT_ENCLOSE) {
03747               EncloseNode* en = NENCLOSE(qn->target);
03748               if (en->type == ENCLOSE_MEMORY) {
03749                 if (NTYPE(en->target) == NT_QTFR) {
03750                   QtfrNode* q = NQTFR(en->target);
03751                   if (IS_REPEAT_INFINITE(q->upper)
03752                       && q->greedy == qn->greedy) {
03753                     qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03754                     if (qn->upper == 1)
03755                       child_state = state;
03756                   }
03757                 }
03758               }
03759             }
03760           }
03761         }
03762       }
03763 
03764       if (state & CEC_IN_FINITE_REPEAT) {
03765         qn->comb_exp_check_num = -1;
03766       }
03767       else {
03768         if (IS_REPEAT_INFINITE(qn->upper)) {
03769           var_num = CEC_INFINITE_NUM;
03770           child_state |= CEC_IN_INFINITE_REPEAT;
03771         }
03772         else {
03773           var_num = qn->upper - qn->lower;
03774         }
03775 
03776         if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03777           add_state |= CEC_CONT_BIG_REPEAT;
03778 
03779         if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03780             ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03781              var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03782           if (qn->comb_exp_check_num == 0) {
03783             env->num_comb_exp_check++;
03784             qn->comb_exp_check_num = env->num_comb_exp_check;
03785             if (env->curr_max_regnum > env->comb_exp_max_regnum)
03786               env->comb_exp_max_regnum = env->curr_max_regnum;
03787           }
03788         }
03789       }
03790 
03791       r = setup_comb_exp_check(target, child_state, env);
03792       r |= add_state;
03793     }
03794     break;
03795 
03796   case NT_ENCLOSE:
03797     {
03798       EncloseNode* en = NENCLOSE(node);
03799 
03800       switch (en->type) {
03801       case ENCLOSE_MEMORY:
03802         {
03803           if (env->curr_max_regnum < en->regnum)
03804             env->curr_max_regnum = en->regnum;
03805 
03806           r = setup_comb_exp_check(en->target, state, env);
03807         }
03808         break;
03809 
03810       default:
03811         r = setup_comb_exp_check(en->target, state, env);
03812         break;
03813       }
03814     }
03815     break;
03816 
03817 #ifdef USE_SUBEXP_CALL
03818   case NT_CALL:
03819     if (IS_CALL_RECURSION(NCALL(node)))
03820       env->has_recursion = 1;
03821     else
03822       r = setup_comb_exp_check(NCALL(node)->target, state, env);
03823     break;
03824 #endif
03825 
03826   default:
03827     break;
03828   }
03829 
03830   return r;
03831 }
03832 #endif
03833 
03834 #define IN_ALT        (1<<0)
03835 #define IN_NOT        (1<<1)
03836 #define IN_REPEAT     (1<<2)
03837 #define IN_VAR_REPEAT (1<<3)
03838 #define IN_ROOT       (1<<4)
03839 
03840 /* setup_tree does the following work.
03841  1. check empty loop. (set qn->target_empty_info)
03842  2. expand ignore-case in char class.
03843  3. set memory status bit flags. (reg->mem_stats)
03844  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03845  5. find invalid patterns in look-behind.
03846  6. expand repeated string.
03847  */
03848 static int
03849 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03850 {
03851   int type;
03852   int r = 0;
03853   int in_root = state & IN_ROOT;
03854 
03855   state &= ~IN_ROOT;
03856 restart:
03857   type = NTYPE(node);
03858   switch (type) {
03859   case NT_LIST:
03860     {
03861       Node* prev = NULL_NODE;
03862       int prev_in_root = 0;
03863       state |= in_root;
03864       do {
03865         r = setup_tree(NCAR(node), reg, state, env);
03866         if (IS_NOT_NULL(prev) && r == 0) {
03867           r = next_setup(prev, NCAR(node), prev_in_root, reg);
03868         }
03869         prev = NCAR(node);
03870         prev_in_root = state & IN_ROOT;
03871         state &= ~IN_ROOT;
03872       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03873     }
03874     break;
03875 
03876   case NT_ALT:
03877     do {
03878       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03879     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03880     break;
03881 
03882   case NT_CCLASS:
03883     break;
03884 
03885   case NT_STR:
03886     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03887       r = expand_case_fold_string(node, reg);
03888     }
03889     break;
03890 
03891   case NT_CTYPE:
03892   case NT_CANY:
03893     break;
03894 
03895 #ifdef USE_SUBEXP_CALL
03896   case NT_CALL:
03897     break;
03898 #endif
03899 
03900   case NT_BREF:
03901     {
03902       int i;
03903       int* p;
03904       Node** nodes = SCANENV_MEM_NODES(env);
03905       BRefNode* br = NBREF(node);
03906       p = BACKREFS_P(br);
03907       for (i = 0; i < br->back_num; i++) {
03908         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03909         BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03910         BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03911 #ifdef USE_BACKREF_WITH_LEVEL
03912         if (IS_BACKREF_NEST_LEVEL(br)) {
03913           BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03914         }
03915 #endif
03916         SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03917       }
03918     }
03919     break;
03920 
03921   case NT_QTFR:
03922     {
03923       OnigDistance d;
03924       QtfrNode* qn = NQTFR(node);
03925       Node* target = qn->target;
03926 
03927       if ((state & IN_REPEAT) != 0) {
03928         qn->state |= NST_IN_REPEAT;
03929       }
03930 
03931       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03932         r = get_min_match_length(target, &d, env);
03933         if (r) break;
03934         if (d == 0) {
03935           qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03936 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03937           r = quantifiers_memory_node_info(target);
03938           if (r < 0) break;
03939           if (r > 0) {
03940             qn->target_empty_info = r;
03941           }
03942 #endif
03943 #if 0
03944           r = get_max_match_length(target, &d, env);
03945           if (r == 0 && d == 0) {
03946             /*  ()* ==> ()?, ()+ ==> ()  */
03947             qn->upper = 1;
03948             if (qn->lower > 1) qn->lower = 1;
03949             if (NTYPE(target) == NT_STR) {
03950               qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03951             }
03952           }
03953 #endif
03954         }
03955       }
03956 
03957       state |= IN_REPEAT;
03958       if (qn->lower != qn->upper)
03959         state |= IN_VAR_REPEAT;
03960       r = setup_tree(target, reg, state, env);
03961       if (r) break;
03962 
03963       /* expand string */
03964 #define EXPAND_STRING_MAX_LENGTH  100
03965       if (NTYPE(target) == NT_STR) {
03966         if (qn->lower > 1) {
03967           int i, n = qn->lower;
03968           OnigDistance len = NSTRING_LEN(target);
03969           StrNode* sn = NSTR(target);
03970           Node* np;
03971 
03972           np = onig_node_new_str(sn->s, sn->end);
03973           if (IS_NULL(np)) return ONIGERR_MEMORY;
03974           NSTR(np)->flag = sn->flag;
03975 
03976           for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
03977             r = onig_node_str_cat(np, sn->s, sn->end);
03978             if (r) {
03979               onig_node_free(np);
03980               return r;
03981             }
03982           }
03983           if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
03984             Node *np1, *np2;
03985 
03986             qn->lower -= i;
03987             if (! IS_REPEAT_INFINITE(qn->upper))
03988               qn->upper -= i;
03989 
03990             np1 = onig_node_new_list(np, NULL);
03991             if (IS_NULL(np1)) {
03992               onig_node_free(np);
03993               return ONIGERR_MEMORY;
03994             }
03995             swap_node(np1, node);
03996             np2 = onig_node_list_add(node, np1);
03997             if (IS_NULL(np2)) {
03998               onig_node_free(np1);
03999               return ONIGERR_MEMORY;
04000             }
04001           }
04002           else {
04003             swap_node(np, node);
04004             onig_node_free(np);
04005           }
04006           break; /* break case NT_QTFR: */
04007         }
04008       }
04009 
04010 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
04011       if (qn->greedy && (qn->target_empty_info != 0)) {
04012         if (NTYPE(target) == NT_QTFR) {
04013           QtfrNode* tqn = NQTFR(target);
04014           if (IS_NOT_NULL(tqn->head_exact)) {
04015             qn->head_exact  = tqn->head_exact;
04016             tqn->head_exact = NULL;
04017           }
04018         }
04019         else {
04020           qn->head_exact = get_head_value_node(qn->target, 1, reg);
04021         }
04022       }
04023 #endif
04024     }
04025     break;
04026 
04027   case NT_ENCLOSE:
04028     {
04029       EncloseNode* en = NENCLOSE(node);
04030 
04031       switch (en->type) {
04032       case ENCLOSE_OPTION:
04033         {
04034           OnigOptionType options = reg->options;
04035           state |= in_root;
04036           reg->options = NENCLOSE(node)->option;
04037           r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04038           reg->options = options;
04039         }
04040         break;
04041 
04042       case ENCLOSE_MEMORY:
04043         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
04044           BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
04045           /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
04046         }
04047         r = setup_tree(en->target, reg, state, env);
04048         break;
04049 
04050       case ENCLOSE_STOP_BACKTRACK:
04051         {
04052           Node* target = en->target;
04053           r = setup_tree(target, reg, state, env);
04054           if (NTYPE(target) == NT_QTFR) {
04055             QtfrNode* tqn = NQTFR(target);
04056             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
04057                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
04058               int qtype = NTYPE(tqn->target);
04059               if (IS_NODE_TYPE_SIMPLE(qtype))
04060                 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
04061             }
04062           }
04063         }
04064         break;
04065 
04066       case ENCLOSE_CONDITION:
04067 #ifdef USE_NAMED_GROUP
04068         if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
04069             env->num_named > 0 &&
04070             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
04071             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
04072           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
04073         }
04074 #endif
04075         r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04076         break;
04077       }
04078     }
04079     break;
04080 
04081   case NT_ANCHOR:
04082     {
04083       AnchorNode* an = NANCHOR(node);
04084 
04085       switch (an->type) {
04086       case ANCHOR_PREC_READ:
04087         r = setup_tree(an->target, reg, state, env);
04088         break;
04089       case ANCHOR_PREC_READ_NOT:
04090         r = setup_tree(an->target, reg, (state | IN_NOT), env);
04091         break;
04092 
04093 /* allowed node types in look-behind */
04094 #define ALLOWED_TYPE_IN_LB  \
04095   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
04096     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
04097 
04098 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
04099 #define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
04100 
04101 #define ALLOWED_ANCHOR_IN_LB \
04102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04103   ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04104   ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04105   ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04106 #define ALLOWED_ANCHOR_IN_LB_NOT \
04107 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04108   ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04109   ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04110   ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04111 
04112       case ANCHOR_LOOK_BEHIND:
04113         {
04114           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04115                               ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
04116           if (r < 0) return r;
04117           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04118           r = setup_look_behind(node, reg, env);
04119           if (r != 0) return r;
04120           if (NTYPE(node) != NT_ANCHOR) goto restart;
04121           r = setup_tree(an->target, reg, state, env);
04122         }
04123         break;
04124 
04125       case ANCHOR_LOOK_BEHIND_NOT:
04126         {
04127           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04128                       ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
04129           if (r < 0) return r;
04130           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04131           r = setup_look_behind(node, reg, env);
04132           if (r != 0) return r;
04133           if (NTYPE(node) != NT_ANCHOR) goto restart;
04134           r = setup_tree(an->target, reg, (state | IN_NOT), env);
04135         }
04136         break;
04137       }
04138     }
04139     break;
04140 
04141   default:
04142     break;
04143   }
04144 
04145   return r;
04146 }
04147 
04148 #ifndef USE_SUNDAY_QUICK_SEARCH
04149 /* set skip map for Boyer-Moore search */
04150 static int
04151 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04152             UChar skip[], int** int_skip, int ignore_case)
04153 {
04154   OnigDistance i, len;
04155   int clen, flen, n, j, k;
04156   UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04157   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04158   OnigEncoding enc = reg->enc;
04159 
04160   len = end - s;
04161   if (len < ONIG_CHAR_TABLE_SIZE) {
04162     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
04163 
04164     n = 0;
04165     for (i = 0; i < len - 1; i += clen) {
04166       p = s + i;
04167       if (ignore_case)
04168         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04169                                                p, end, items);
04170       clen = enclen(enc, p, end);
04171 
04172       for (j = 0; j < n; j++) {
04173         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04174           return 1;  /* different length isn't supported. */
04175         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04176         if (flen != clen)
04177           return 1;  /* different length isn't supported. */
04178       }
04179       for (j = 0; j < clen; j++) {
04180         skip[s[i + j]] = (UChar )(len - 1 - i - j);
04181         for (k = 0; k < n; k++) {
04182           skip[buf[k][j]] = (UChar )(len - 1 - i - j);
04183         }
04184       }
04185     }
04186   }
04187   else {
04188     if (IS_NULL(*int_skip)) {
04189       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04190       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04191     }
04192     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
04193 
04194     n = 0;
04195     for (i = 0; i < len - 1; i += clen) {
04196       p = s + i;
04197       if (ignore_case)
04198         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04199                                                p, end, items);
04200       clen = enclen(enc, p, end);
04201 
04202       for (j = 0; j < n; j++) {
04203         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04204           return 1;  /* different length isn't supported. */
04205         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04206         if (flen != clen)
04207           return 1;  /* different length isn't supported. */
04208       }
04209       for (j = 0; j < clen; j++) {
04210         (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
04211         for (k = 0; k < n; k++) {
04212           (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
04213         }
04214       }
04215     }
04216   }
04217   return 0;
04218 }
04219 
04220 #else /* USE_SUNDAY_QUICK_SEARCH */
04221 
04222 /* set skip map for Sunday's quick search */
04223 static int
04224 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04225             UChar skip[], int** int_skip, int ignore_case)
04226 {
04227   OnigDistance i, len;
04228   int clen, flen, n, j, k;
04229   UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04230   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04231   OnigEncoding enc = reg->enc;
04232 
04233   len = end - s;
04234   if (len < ONIG_CHAR_TABLE_SIZE) {
04235     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
04236 
04237     n = 0;
04238     for (i = 0; i < len; i += clen) {
04239       p = s + i;
04240       if (ignore_case)
04241         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04242                                                p, end, items);
04243       clen = enclen(enc, p, end);
04244 
04245       for (j = 0; j < n; j++) {
04246         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04247           return 1;  /* different length isn't supported. */
04248         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04249         if (flen != clen)
04250           return 1;  /* different length isn't supported. */
04251       }
04252       for (j = 0; j < clen; j++) {
04253         skip[s[i + j]] = (UChar )(len - i - j);
04254         for (k = 0; k < n; k++) {
04255           skip[buf[k][j]] = (UChar )(len - i - j);
04256         }
04257       }
04258     }
04259   }
04260   else {
04261     if (IS_NULL(*int_skip)) {
04262       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04263       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04264     }
04265     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
04266 
04267     n = 0;
04268     for (i = 0; i < len; i += clen) {
04269       p = s + i;
04270       if (ignore_case)
04271         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04272                                                p, end, items);
04273       clen = enclen(enc, p, end);
04274 
04275       for (j = 0; j < n; j++) {
04276         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04277           return 1;  /* different length isn't supported. */
04278         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04279         if (flen != clen)
04280           return 1;  /* different length isn't supported. */
04281       }
04282       for (j = 0; j < clen; j++) {
04283         (*int_skip)[s[i + j]] = (int )(len - i - j);
04284         for (k = 0; k < n; k++) {
04285           (*int_skip)[buf[k][j]] = (int )(len - i - j);
04286         }
04287       }
04288     }
04289   }
04290   return 0;
04291 }
04292 #endif /* USE_SUNDAY_QUICK_SEARCH */
04293 
04294 #define OPT_EXACT_MAXLEN   24
04295 
04296 typedef struct {
04297   OnigDistance min;  /* min byte length */
04298   OnigDistance max;  /* max byte length */
04299 } MinMaxLen;
04300 
04301 typedef struct {
04302   MinMaxLen        mmd;
04303   OnigEncoding     enc;
04304   OnigOptionType   options;
04305   OnigCaseFoldType case_fold_flag;
04306   ScanEnv*         scan_env;
04307 } OptEnv;
04308 
04309 typedef struct {
04310   int left_anchor;
04311   int right_anchor;
04312 } OptAncInfo;
04313 
04314 typedef struct {
04315   MinMaxLen  mmd; /* info position */
04316   OptAncInfo anc;
04317 
04318   int   reach_end;
04319   int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
04320   int   len;
04321   UChar s[OPT_EXACT_MAXLEN];
04322 } OptExactInfo;
04323 
04324 typedef struct {
04325   MinMaxLen mmd; /* info position */
04326   OptAncInfo anc;
04327 
04328   int   value;      /* weighted value */
04329   UChar map[ONIG_CHAR_TABLE_SIZE];
04330 } OptMapInfo;
04331 
04332 typedef struct {
04333   MinMaxLen    len;
04334 
04335   OptAncInfo   anc;
04336   OptExactInfo exb;    /* boundary */
04337   OptExactInfo exm;    /* middle */
04338   OptExactInfo expr;   /* prec read (?=...) */
04339 
04340   OptMapInfo   map;   /* boundary */
04341 } NodeOptInfo;
04342 
04343 
04344 static int
04345 map_position_value(OnigEncoding enc, int i)
04346 {
04347   static const short int ByteValTable[] = {
04348      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
04349      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
04350     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
04351      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
04352      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04353      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
04354      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04355      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
04356   };
04357 
04358   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04359     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04360       return 20;
04361     else
04362       return (int )ByteValTable[i];
04363   }
04364   else
04365     return 4;   /* Take it easy. */
04366 }
04367 
04368 static int
04369 distance_value(MinMaxLen* mm)
04370 {
04371   /* 1000 / (min-max-dist + 1) */
04372   static const short int dist_vals[] = {
04373     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
04374       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
04375       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
04376       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
04377       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
04378       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
04379       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
04380       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
04381       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
04382       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
04383   };
04384 
04385   OnigDistance d;
04386 
04387   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04388 
04389   d = mm->max - mm->min;
04390   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04391     /* return dist_vals[d] * 16 / (mm->min + 12); */
04392     return (int )dist_vals[d];
04393   else
04394     return 1;
04395 }
04396 
04397 static int
04398 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04399 {
04400   if (v2 <= 0) return -1;
04401   if (v1 <= 0) return  1;
04402 
04403   v1 *= distance_value(d1);
04404   v2 *= distance_value(d2);
04405 
04406   if (v2 > v1) return  1;
04407   if (v2 < v1) return -1;
04408 
04409   if (d2->min < d1->min) return  1;
04410   if (d2->min > d1->min) return -1;
04411   return 0;
04412 }
04413 
04414 static int
04415 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04416 {
04417   return (a->min == b->min && a->max == b->max) ? 1 : 0;
04418 }
04419 
04420 
04421 static void
04422 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04423 {
04424   mml->min = min;
04425   mml->max = max;
04426 }
04427 
04428 static void
04429 clear_mml(MinMaxLen* mml)
04430 {
04431   mml->min = mml->max = 0;
04432 }
04433 
04434 static void
04435 copy_mml(MinMaxLen* to, MinMaxLen* from)
04436 {
04437   to->min = from->min;
04438   to->max = from->max;
04439 }
04440 
04441 static void
04442 add_mml(MinMaxLen* to, MinMaxLen* from)
04443 {
04444   to->min = distance_add(to->min, from->min);
04445   to->max = distance_add(to->max, from->max);
04446 }
04447 
04448 #if 0
04449 static void
04450 add_len_mml(MinMaxLen* to, OnigDistance len)
04451 {
04452   to->min = distance_add(to->min, len);
04453   to->max = distance_add(to->max, len);
04454 }
04455 #endif
04456 
04457 static void
04458 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04459 {
04460   if (to->min > from->min) to->min = from->min;
04461   if (to->max < from->max) to->max = from->max;
04462 }
04463 
04464 static void
04465 copy_opt_env(OptEnv* to, OptEnv* from)
04466 {
04467   *to = *from;
04468 }
04469 
04470 static void
04471 clear_opt_anc_info(OptAncInfo* anc)
04472 {
04473   anc->left_anchor  = 0;
04474   anc->right_anchor = 0;
04475 }
04476 
04477 static void
04478 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04479 {
04480   *to = *from;
04481 }
04482 
04483 static void
04484 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04485                     OnigDistance left_len, OnigDistance right_len)
04486 {
04487   clear_opt_anc_info(to);
04488 
04489   to->left_anchor = left->left_anchor;
04490   if (left_len == 0) {
04491     to->left_anchor |= right->left_anchor;
04492   }
04493 
04494   to->right_anchor = right->right_anchor;
04495   if (right_len == 0) {
04496     to->right_anchor |= left->right_anchor;
04497   }
04498 }
04499 
04500 static int
04501 is_left_anchor(int anc)
04502 {
04503   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04504       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04505       anc == ANCHOR_PREC_READ_NOT)
04506     return 0;
04507 
04508   return 1;
04509 }
04510 
04511 static int
04512 is_set_opt_anc_info(OptAncInfo* to, int anc)
04513 {
04514   if ((to->left_anchor & anc) != 0) return 1;
04515 
04516   return ((to->right_anchor & anc) != 0 ? 1 : 0);
04517 }
04518 
04519 static void
04520 add_opt_anc_info(OptAncInfo* to, int anc)
04521 {
04522   if (is_left_anchor(anc))
04523     to->left_anchor |= anc;
04524   else
04525     to->right_anchor |= anc;
04526 }
04527 
04528 static void
04529 remove_opt_anc_info(OptAncInfo* to, int anc)
04530 {
04531   if (is_left_anchor(anc))
04532     to->left_anchor &= ~anc;
04533   else
04534     to->right_anchor &= ~anc;
04535 }
04536 
04537 static void
04538 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04539 {
04540   to->left_anchor  &= add->left_anchor;
04541   to->right_anchor &= add->right_anchor;
04542 }
04543 
04544 static int
04545 is_full_opt_exact_info(OptExactInfo* ex)
04546 {
04547   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04548 }
04549 
04550 static void
04551 clear_opt_exact_info(OptExactInfo* ex)
04552 {
04553   clear_mml(&ex->mmd);
04554   clear_opt_anc_info(&ex->anc);
04555   ex->reach_end   = 0;
04556   ex->ignore_case = -1;   /* unset */
04557   ex->len         = 0;
04558   ex->s[0]        = '\0';
04559 }
04560 
04561 static void
04562 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04563 {
04564   *to = *from;
04565 }
04566 
04567 static void
04568 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04569 {
04570   int i, j, len;
04571   UChar *p, *end;
04572   OptAncInfo tanc;
04573 
04574   if (to->ignore_case < 0)
04575     to->ignore_case = add->ignore_case;
04576   else if (to->ignore_case != add->ignore_case)
04577     return ;  /* avoid */
04578 
04579   p = add->s;
04580   end = p + add->len;
04581   for (i = to->len; p < end; ) {
04582     len = enclen(enc, p, end);
04583     if (i + len > OPT_EXACT_MAXLEN) break;
04584     for (j = 0; j < len && p < end; j++)
04585       to->s[i++] = *p++;
04586   }
04587 
04588   to->len = i;
04589   to->reach_end = (p == end ? add->reach_end : 0);
04590 
04591   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04592   if (! to->reach_end) tanc.right_anchor = 0;
04593   copy_opt_anc_info(&to->anc, &tanc);
04594 }
04595 
04596 static void
04597 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04598                           int raw ARG_UNUSED, OnigEncoding enc)
04599 {
04600   int i, j, len;
04601   UChar *p;
04602 
04603   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04604     len = enclen(enc, p, end);
04605     if (i + len > OPT_EXACT_MAXLEN) break;
04606     for (j = 0; j < len && p < end; j++)
04607       to->s[i++] = *p++;
04608   }
04609 
04610   to->len = i;
04611 }
04612 
04613 static void
04614 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04615 {
04616   int i, j, len;
04617 
04618   if (add->len == 0 || to->len == 0) {
04619     clear_opt_exact_info(to);
04620     return ;
04621   }
04622 
04623   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04624     clear_opt_exact_info(to);
04625     return ;
04626   }
04627 
04628   for (i = 0; i < to->len && i < add->len; ) {
04629     if (to->s[i] != add->s[i]) break;
04630     len = enclen(env->enc, to->s + i, to->s + to->len);
04631 
04632     for (j = 1; j < len; j++) {
04633       if (to->s[i+j] != add->s[i+j]) break;
04634     }
04635     if (j < len) break;
04636     i += len;
04637   }
04638 
04639   if (! add->reach_end || i < add->len || i < to->len) {
04640     to->reach_end = 0;
04641   }
04642   to->len = i;
04643   if (to->ignore_case < 0)
04644     to->ignore_case = add->ignore_case;
04645   else if (add->ignore_case >= 0)
04646     to->ignore_case |= add->ignore_case;
04647 
04648   alt_merge_opt_anc_info(&to->anc, &add->anc);
04649   if (! to->reach_end) to->anc.right_anchor = 0;
04650 }
04651 
04652 static void
04653 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04654 {
04655   int v1, v2;
04656 
04657   v1 = now->len;
04658   v2 = alt->len;
04659 
04660   if (v2 == 0) {
04661     return ;
04662   }
04663   else if (v1 == 0) {
04664     copy_opt_exact_info(now, alt);
04665     return ;
04666   }
04667   else if (v1 <= 2 && v2 <= 2) {
04668     /* ByteValTable[x] is big value --> low price */
04669     v2 = map_position_value(enc, now->s[0]);
04670     v1 = map_position_value(enc, alt->s[0]);
04671 
04672     if (now->len > 1) v1 += 5;
04673     if (alt->len > 1) v2 += 5;
04674   }
04675 
04676   if (now->ignore_case <= 0) v1 *= 2;
04677   if (alt->ignore_case <= 0) v2 *= 2;
04678 
04679   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04680     copy_opt_exact_info(now, alt);
04681 }
04682 
04683 static void
04684 clear_opt_map_info(OptMapInfo* map)
04685 {
04686   static const OptMapInfo clean_info = {
04687     {0, 0}, {0, 0}, 0,
04688     {
04689       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04690       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04691       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04692       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04693       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04694       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04695       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04696       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04697       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04698       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04699       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04700       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04701       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04702       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04703       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04704       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04705     }
04706   };
04707 
04708   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04709 }
04710 
04711 static void
04712 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04713 {
04714   *to = *from;
04715 }
04716 
04717 static void
04718 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04719 {
04720   if (map->map[c] == 0) {
04721     map->map[c] = 1;
04722     map->value += map_position_value(enc, c);
04723   }
04724 }
04725 
04726 static int
04727 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04728                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04729 {
04730   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04731   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04732   int i, n;
04733 
04734   add_char_opt_map_info(map, p[0], enc);
04735 
04736   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04737   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04738   if (n < 0) return n;
04739 
04740   for (i = 0; i < n; i++) {
04741     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04742     add_char_opt_map_info(map, buf[0], enc);
04743   }
04744 
04745   return 0;
04746 }
04747 
04748 static void
04749 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04750 {
04751   const int z = 1<<15; /* 32768: something big value */
04752 
04753   int v1, v2;
04754 
04755   if (alt->value == 0) return ;
04756   if (now->value == 0) {
04757     copy_opt_map_info(now, alt);
04758     return ;
04759   }
04760 
04761   v1 = z / now->value;
04762   v2 = z / alt->value;
04763   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04764     copy_opt_map_info(now, alt);
04765 }
04766 
04767 static int
04768 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04769 {
04770 #define COMP_EM_BASE  20
04771   int ve, vm;
04772 
04773   if (m->value <= 0) return -1;
04774 
04775   ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
04776   vm = COMP_EM_BASE * 5 * 2 / m->value;
04777   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04778 }
04779 
04780 static void
04781 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04782 {
04783   int i, val;
04784 
04785   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04786   if (to->value == 0) return ;
04787   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04788     clear_opt_map_info(to);
04789     return ;
04790   }
04791 
04792   alt_merge_mml(&to->mmd, &add->mmd);
04793 
04794   val = 0;
04795   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04796     if (add->map[i])
04797       to->map[i] = 1;
04798 
04799     if (to->map[i])
04800       val += map_position_value(enc, i);
04801   }
04802   to->value = val;
04803 
04804   alt_merge_opt_anc_info(&to->anc, &add->anc);
04805 }
04806 
04807 static void
04808 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04809 {
04810   copy_mml(&(opt->exb.mmd),  mmd);
04811   copy_mml(&(opt->expr.mmd), mmd);
04812   copy_mml(&(opt->map.mmd),  mmd);
04813 }
04814 
04815 static void
04816 clear_node_opt_info(NodeOptInfo* opt)
04817 {
04818   clear_mml(&opt->len);
04819   clear_opt_anc_info(&opt->anc);
04820   clear_opt_exact_info(&opt->exb);
04821   clear_opt_exact_info(&opt->exm);
04822   clear_opt_exact_info(&opt->expr);
04823   clear_opt_map_info(&opt->map);
04824 }
04825 
04826 static void
04827 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04828 {
04829   *to = *from;
04830 }
04831 
04832 static void
04833 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04834 {
04835   int exb_reach, exm_reach;
04836   OptAncInfo tanc;
04837 
04838   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04839   copy_opt_anc_info(&to->anc, &tanc);
04840 
04841   if (add->exb.len > 0 && to->len.max == 0) {
04842     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04843                         to->len.max, add->len.max);
04844     copy_opt_anc_info(&add->exb.anc, &tanc);
04845   }
04846 
04847   if (add->map.value > 0 && to->len.max == 0) {
04848     if (add->map.mmd.max == 0)
04849       add->map.anc.left_anchor |= to->anc.left_anchor;
04850   }
04851 
04852   exb_reach = to->exb.reach_end;
04853   exm_reach = to->exm.reach_end;
04854 
04855   if (add->len.max != 0)
04856     to->exb.reach_end = to->exm.reach_end = 0;
04857 
04858   if (add->exb.len > 0) {
04859     if (exb_reach) {
04860       concat_opt_exact_info(&to->exb, &add->exb, enc);
04861       clear_opt_exact_info(&add->exb);
04862     }
04863     else if (exm_reach) {
04864       concat_opt_exact_info(&to->exm, &add->exb, enc);
04865       clear_opt_exact_info(&add->exb);
04866     }
04867   }
04868   select_opt_exact_info(enc, &to->exm, &add->exb);
04869   select_opt_exact_info(enc, &to->exm, &add->exm);
04870 
04871   if (to->expr.len > 0) {
04872     if (add->len.max > 0) {
04873       if (to->expr.len > (int )add->len.max)
04874         to->expr.len = (int )add->len.max;
04875 
04876       if (to->expr.mmd.max == 0)
04877         select_opt_exact_info(enc, &to->exb, &to->expr);
04878       else
04879         select_opt_exact_info(enc, &to->exm, &to->expr);
04880     }
04881   }
04882   else if (add->expr.len > 0) {
04883     copy_opt_exact_info(&to->expr, &add->expr);
04884   }
04885 
04886   select_opt_map_info(&to->map, &add->map);
04887 
04888   add_mml(&to->len, &add->len);
04889 }
04890 
04891 static void
04892 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04893 {
04894   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04895   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04896   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04897   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04898   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04899 
04900   alt_merge_mml(&to->len, &add->len);
04901 }
04902 
04903 
04904 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04905 
04906 static int
04907 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04908 {
04909   int type;
04910   int r = 0;
04911 
04912   clear_node_opt_info(opt);
04913   set_bound_node_opt_info(opt, &env->mmd);
04914 
04915   type = NTYPE(node);
04916   switch (type) {
04917   case NT_LIST:
04918     {
04919       OptEnv nenv;
04920       NodeOptInfo nopt;
04921       Node* nd = node;
04922 
04923       copy_opt_env(&nenv, env);
04924       do {
04925         r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04926         if (r == 0) {
04927           add_mml(&nenv.mmd, &nopt.len);
04928           concat_left_node_opt_info(env->enc, opt, &nopt);
04929         }
04930       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04931     }
04932     break;
04933 
04934   case NT_ALT:
04935     {
04936       NodeOptInfo nopt;
04937       Node* nd = node;
04938 
04939       do {
04940         r = optimize_node_left(NCAR(nd), &nopt, env);
04941         if (r == 0) {
04942           if (nd == node) copy_node_opt_info(opt, &nopt);
04943           else            alt_merge_node_opt_info(opt, &nopt, env);
04944         }
04945       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04946     }
04947     break;
04948 
04949   case NT_STR:
04950     {
04951       StrNode* sn = NSTR(node);
04952       OnigDistance slen = sn->end - sn->s;
04953       int is_raw = NSTRING_IS_RAW(node);
04954 
04955       if (! NSTRING_IS_AMBIG(node)) {
04956         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04957                                   is_raw, env->enc);
04958         opt->exb.ignore_case = 0;
04959         if (slen > 0) {
04960           add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04961         }
04962         set_mml(&opt->len, slen, slen);
04963       }
04964       else {
04965         OnigDistance max;
04966 
04967         if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04968           int n = onigenc_strlen(env->enc, sn->s, sn->end);
04969           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04970         }
04971         else {
04972           concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04973                                     is_raw, env->enc);
04974           opt->exb.ignore_case = 1;
04975 
04976           if (slen > 0) {
04977             r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04978                                           env->enc, env->case_fold_flag);
04979             if (r != 0) break;
04980           }
04981 
04982           max = slen;
04983         }
04984 
04985         set_mml(&opt->len, slen, max);
04986       }
04987 
04988       if ((OnigDistance )opt->exb.len == slen)
04989         opt->exb.reach_end = 1;
04990     }
04991     break;
04992 
04993   case NT_CCLASS:
04994     {
04995       int i, z;
04996       CClassNode* cc = NCCLASS(node);
04997 
04998       /* no need to check ignore case. (set in setup_tree()) */
04999 
05000       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
05001         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
05002         OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05003 
05004         set_mml(&opt->len, min, max);
05005       }
05006       else {
05007         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05008           z = BITSET_AT(cc->bs, i);
05009           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
05010             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05011           }
05012         }
05013         set_mml(&opt->len, 1, 1);
05014       }
05015     }
05016     break;
05017 
05018   case NT_CTYPE:
05019     {
05020       int i, min, max;
05021       int maxcode;
05022 
05023       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05024 
05025       if (max == 1) {
05026         min = 1;
05027 
05028         maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
05029         switch (NCTYPE(node)->ctype) {
05030         case ONIGENC_CTYPE_WORD:
05031           if (NCTYPE(node)->not != 0) {
05032             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05033               if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
05034                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05035               }
05036             }
05037           }
05038           else {
05039             for (i = 0; i < maxcode; i++) {
05040               if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
05041                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05042               }
05043             }
05044           }
05045           break;
05046         }
05047       }
05048       else {
05049         min = ONIGENC_MBC_MINLEN(env->enc);
05050       }
05051       set_mml(&opt->len, min, max);
05052     }
05053     break;
05054 
05055   case NT_CANY:
05056     {
05057       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
05058       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05059       set_mml(&opt->len, min, max);
05060     }
05061     break;
05062 
05063   case NT_ANCHOR:
05064     switch (NANCHOR(node)->type) {
05065     case ANCHOR_BEGIN_BUF:
05066     case ANCHOR_BEGIN_POSITION:
05067     case ANCHOR_BEGIN_LINE:
05068     case ANCHOR_END_BUF:
05069     case ANCHOR_SEMI_END_BUF:
05070     case ANCHOR_END_LINE:
05071     case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
05072       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
05073       break;
05074 
05075     case ANCHOR_PREC_READ:
05076       {
05077         NodeOptInfo nopt;
05078 
05079         r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
05080         if (r == 0) {
05081           if (nopt.exb.len > 0)
05082             copy_opt_exact_info(&opt->expr, &nopt.exb);
05083           else if (nopt.exm.len > 0)
05084             copy_opt_exact_info(&opt->expr, &nopt.exm);
05085 
05086           opt->expr.reach_end = 0;
05087 
05088           if (nopt.map.value > 0)
05089             copy_opt_map_info(&opt->map, &nopt.map);
05090         }
05091       }
05092       break;
05093 
05094     case ANCHOR_PREC_READ_NOT:
05095     case ANCHOR_LOOK_BEHIND_NOT:
05096       break;
05097     }
05098     break;
05099 
05100   case NT_BREF:
05101     {
05102       int i;
05103       int* backs;
05104       OnigDistance min, max, tmin, tmax;
05105       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
05106       BRefNode* br = NBREF(node);
05107 
05108       if (br->state & NST_RECURSION) {
05109         set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05110         break;
05111       }
05112       backs = BACKREFS_P(br);
05113       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
05114       if (r != 0) break;
05115       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
05116       if (r != 0) break;
05117       for (i = 1; i < br->back_num; i++) {
05118         r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
05119         if (r != 0) break;
05120         r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
05121         if (r != 0) break;
05122         if (min > tmin) min = tmin;
05123         if (max < tmax) max = tmax;
05124       }
05125       if (r == 0) set_mml(&opt->len, min, max);
05126     }
05127     break;
05128 
05129 #ifdef USE_SUBEXP_CALL
05130   case NT_CALL:
05131     if (IS_CALL_RECURSION(NCALL(node)))
05132       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05133     else {
05134       OnigOptionType save = env->options;
05135       env->options = NENCLOSE(NCALL(node)->target)->option;
05136       r = optimize_node_left(NCALL(node)->target, opt, env);
05137       env->options = save;
05138     }
05139     break;
05140 #endif
05141 
05142   case NT_QTFR:
05143     {
05144       int i;
05145       OnigDistance min, max;
05146       NodeOptInfo nopt;
05147       QtfrNode* qn = NQTFR(node);
05148 
05149       r = optimize_node_left(qn->target, &nopt, env);
05150       if (r) break;
05151 
05152       if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
05153         if (env->mmd.max == 0 &&
05154             NTYPE(qn->target) == NT_CANY && qn->greedy) {
05155           if (IS_MULTILINE(env->options))
05156             /* implicit anchor: /.*a/ ==> /\A.*a/ */
05157             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
05158           else
05159             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
05160         }
05161       }
05162       else {
05163         if (qn->lower > 0) {
05164           copy_node_opt_info(opt, &nopt);
05165           if (nopt.exb.len > 0) {
05166             if (nopt.exb.reach_end) {
05167               for (i = 2; i <= qn->lower &&
05168                           ! is_full_opt_exact_info(&opt->exb); i++) {
05169                 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
05170               }
05171               if (i < qn->lower) {
05172                 opt->exb.reach_end = 0;
05173               }
05174             }
05175           }
05176 
05177           if (qn->lower != qn->upper) {
05178             opt->exb.reach_end = 0;
05179             opt->exm.reach_end = 0;
05180           }
05181           if (qn->lower > 1)
05182             opt->exm.reach_end = 0;
05183         }
05184       }
05185 
05186       min = distance_multiply(nopt.len.min, qn->lower);
05187       if (IS_REPEAT_INFINITE(qn->upper))
05188         max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
05189       else
05190         max = distance_multiply(nopt.len.max, qn->upper);
05191 
05192       set_mml(&opt->len, min, max);
05193     }
05194     break;
05195 
05196   case NT_ENCLOSE:
05197     {
05198       EncloseNode* en = NENCLOSE(node);
05199 
05200       switch (en->type) {
05201       case ENCLOSE_OPTION:
05202         {
05203           OnigOptionType save = env->options;
05204 
05205           env->options = en->option;
05206           r = optimize_node_left(en->target, opt, env);
05207           env->options = save;
05208         }
05209         break;
05210 
05211       case ENCLOSE_MEMORY:
05212 #ifdef USE_SUBEXP_CALL
05213         en->opt_count++;
05214         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
05215           OnigDistance min, max;
05216 
05217           min = 0;
05218           max = ONIG_INFINITE_DISTANCE;
05219           if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
05220           if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
05221           set_mml(&opt->len, min, max);
05222         }
05223         else
05224 #endif
05225         {
05226           r = optimize_node_left(en->target, opt, env);
05227 
05228           if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
05229             if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
05230               remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
05231           }
05232         }
05233         break;
05234 
05235       case ENCLOSE_STOP_BACKTRACK:
05236       case ENCLOSE_CONDITION:
05237         r = optimize_node_left(en->target, opt, env);
05238         break;
05239       }
05240     }
05241     break;
05242 
05243   default:
05244 #ifdef ONIG_DEBUG
05245     if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
05246             NTYPE(node));
05247 #endif
05248     r = ONIGERR_TYPE_BUG;
05249     break;
05250   }
05251 
05252   return r;
05253 }
05254 
05255 static int
05256 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
05257 {
05258   int r;
05259   int allow_reverse;
05260 
05261   if (e->len == 0) return 0;
05262 
05263   reg->exact = (UChar* )xmalloc(e->len);
05264   CHECK_NULL_RETURN_MEMERR(reg->exact);
05265   xmemcpy(reg->exact, e->s, e->len);
05266   reg->exact_end = reg->exact + e->len;
05267 
05268   allow_reverse =
05269         ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
05270 
05271   if (e->ignore_case > 0) {
05272     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05273       r = set_bm_skip(reg->exact, reg->exact_end, reg,
05274                       reg->map, &(reg->int_map), 1);
05275       if (r == 0) {
05276         reg->optimize = (allow_reverse != 0
05277                          ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
05278       }
05279       else {
05280         reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05281       }
05282     }
05283     else {
05284       reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05285     }
05286   }
05287   else {
05288     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05289       r = set_bm_skip(reg->exact, reg->exact_end, reg,
05290                       reg->map, &(reg->int_map), 0);
05291       if (r) return r;
05292 
05293       reg->optimize = (allow_reverse != 0
05294                        ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
05295     }
05296     else {
05297       reg->optimize = ONIG_OPTIMIZE_EXACT;
05298     }
05299   }
05300 
05301   reg->dmin = e->mmd.min;
05302   reg->dmax = e->mmd.max;
05303 
05304   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05305     reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
05306   }
05307 
05308   return 0;
05309 }
05310 
05311 static void
05312 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
05313 {
05314   int i;
05315 
05316   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05317     reg->map[i] = m->map[i];
05318 
05319   reg->optimize   = ONIG_OPTIMIZE_MAP;
05320   reg->dmin       = m->mmd.min;
05321   reg->dmax       = m->mmd.max;
05322 
05323   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05324     reg->threshold_len = (int )(reg->dmin + 1);
05325   }
05326 }
05327 
05328 static void
05329 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
05330 {
05331   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
05332   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
05333 }
05334 
05335 #ifdef ONIG_DEBUG
05336 static void print_optimize_info(FILE* f, regex_t* reg);
05337 #endif
05338 
05339 static int
05340 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
05341 {
05342 
05343   int r;
05344   NodeOptInfo opt;
05345   OptEnv env;
05346 
05347   env.enc            = reg->enc;
05348   env.options        = reg->options;
05349   env.case_fold_flag = reg->case_fold_flag;
05350   env.scan_env   = scan_env;
05351   clear_mml(&env.mmd);
05352 
05353   r = optimize_node_left(node, &opt, &env);
05354   if (r) return r;
05355 
05356   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05357         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
05358         ANCHOR_LOOK_BEHIND);
05359 
05360   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05361 
05362   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05363     reg->anchor_dmin = opt.len.min;
05364     reg->anchor_dmax = opt.len.max;
05365   }
05366 
05367   if (opt.exb.len > 0 || opt.exm.len > 0) {
05368     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05369     if (opt.map.value > 0 &&
05370         comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05371       goto set_map;
05372     }
05373     else {
05374       r = set_optimize_exact_info(reg, &opt.exb);
05375       set_sub_anchor(reg, &opt.exb.anc);
05376     }
05377   }
05378   else if (opt.map.value > 0) {
05379   set_map:
05380     set_optimize_map_info(reg, &opt.map);
05381     set_sub_anchor(reg, &opt.map.anc);
05382   }
05383   else {
05384     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05385     if (opt.len.max == 0)
05386       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05387   }
05388 
05389 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05390   if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05391 #endif
05392   return r;
05393 }
05394 
05395 static void
05396 clear_optimize_info(regex_t* reg)
05397 {
05398   reg->optimize      = ONIG_OPTIMIZE_NONE;
05399   reg->anchor        = 0;
05400   reg->anchor_dmin   = 0;
05401   reg->anchor_dmax   = 0;
05402   reg->sub_anchor    = 0;
05403   reg->exact_end     = (UChar* )NULL;
05404   reg->threshold_len = 0;
05405   if (IS_NOT_NULL(reg->exact)) {
05406     xfree(reg->exact);
05407     reg->exact = (UChar* )NULL;
05408   }
05409 }
05410 
05411 #ifdef ONIG_DEBUG
05412 
05413 static void print_enc_string(FILE* fp, OnigEncoding enc,
05414                              const UChar *s, const UChar *end)
05415 {
05416   fprintf(fp, "\nPATTERN: /");
05417 
05418   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05419     const UChar *p;
05420     OnigCodePoint code;
05421 
05422     p = s;
05423     while (p < end) {
05424       code = ONIGENC_MBC_TO_CODE(enc, p, end);
05425       if (code >= 0x80) {
05426         fprintf(fp, " 0x%04x ", (int )code);
05427       }
05428       else {
05429         fputc((int )code, fp);
05430       }
05431 
05432       p += enclen(enc, p, end);
05433     }
05434   }
05435   else {
05436     while (s < end) {
05437       fputc((int )*s, fp);
05438       s++;
05439     }
05440   }
05441 
05442   fprintf(fp, "/ (%s)\n", enc->name);
05443 }
05444 
05445 static void
05446 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05447 {
05448   if (a == ONIG_INFINITE_DISTANCE)
05449     fputs("inf", f);
05450   else
05451     fprintf(f, "(%"PRIuSIZE")", a);
05452 
05453   fputs("-", f);
05454 
05455   if (b == ONIG_INFINITE_DISTANCE)
05456     fputs("inf", f);
05457   else
05458     fprintf(f, "(%"PRIuSIZE")", b);
05459 }
05460 
05461 static void
05462 print_anchor(FILE* f, int anchor)
05463 {
05464   int q = 0;
05465 
05466   fprintf(f, "[");
05467 
05468   if (anchor & ANCHOR_BEGIN_BUF) {
05469     fprintf(f, "begin-buf");
05470     q = 1;
05471   }
05472   if (anchor & ANCHOR_BEGIN_LINE) {
05473     if (q) fprintf(f, ", ");
05474     q = 1;
05475     fprintf(f, "begin-line");
05476   }
05477   if (anchor & ANCHOR_BEGIN_POSITION) {
05478     if (q) fprintf(f, ", ");
05479     q = 1;
05480     fprintf(f, "begin-pos");
05481   }
05482   if (anchor & ANCHOR_END_BUF) {
05483     if (q) fprintf(f, ", ");
05484     q = 1;
05485     fprintf(f, "end-buf");
05486   }
05487   if (anchor & ANCHOR_SEMI_END_BUF) {
05488     if (q) fprintf(f, ", ");
05489     q = 1;
05490     fprintf(f, "semi-end-buf");
05491   }
05492   if (anchor & ANCHOR_END_LINE) {
05493     if (q) fprintf(f, ", ");
05494     q = 1;
05495     fprintf(f, "end-line");
05496   }
05497   if (anchor & ANCHOR_ANYCHAR_STAR) {
05498     if (q) fprintf(f, ", ");
05499     q = 1;
05500     fprintf(f, "anychar-star");
05501   }
05502   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05503     if (q) fprintf(f, ", ");
05504     fprintf(f, "anychar-star-ml");
05505   }
05506 
05507   fprintf(f, "]");
05508 }
05509 
05510 static void
05511 print_optimize_info(FILE* f, regex_t* reg)
05512 {
05513   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05514                               "EXACT_IC", "MAP",
05515                               "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
05516 
05517   fprintf(f, "optimize: %s\n", on[reg->optimize]);
05518   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
05519   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05520     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05521   fprintf(f, "\n");
05522 
05523   if (reg->optimize) {
05524     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
05525     fprintf(f, "\n");
05526   }
05527   fprintf(f, "\n");
05528 
05529   if (reg->exact) {
05530     UChar *p;
05531     fprintf(f, "exact: [");
05532     for (p = reg->exact; p < reg->exact_end; p++) {
05533       fputc(*p, f);
05534     }
05535     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05536   }
05537   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05538     int c, i, n = 0;
05539 
05540     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05541       if (reg->map[i]) n++;
05542 
05543     fprintf(f, "map: n=%d\n", n);
05544     if (n > 0) {
05545       c = 0;
05546       fputc('[', f);
05547       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05548         if (reg->map[i] != 0) {
05549           if (c > 0)  fputs(", ", f);
05550           c++;
05551           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05552               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05553             fputc(i, f);
05554           else
05555             fprintf(f, "%d", i);
05556         }
05557       }
05558       fprintf(f, "]\n");
05559     }
05560   }
05561 }
05562 #endif /* ONIG_DEBUG */
05563 
05564 
05565 extern void
05566 onig_free_body(regex_t* reg)
05567 {
05568   if (IS_NOT_NULL(reg)) {
05569     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
05570     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
05571     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
05572     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05573     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
05574     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
05575 
05576 #ifdef USE_NAMED_GROUP
05577     onig_names_free(reg);
05578 #endif
05579   }
05580 }
05581 
05582 extern void
05583 onig_free(regex_t* reg)
05584 {
05585   if (IS_NOT_NULL(reg)) {
05586     onig_free_body(reg);
05587     xfree(reg);
05588   }
05589 }
05590 
05591 size_t
05592 onig_memsize(const regex_t *reg)
05593 {
05594     size_t size = sizeof(regex_t);
05595     if (IS_NULL(reg)) return 0;
05596     if (IS_NOT_NULL(reg->p))                size += reg->alloc;
05597     if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
05598     if (IS_NOT_NULL(reg->int_map))          size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05599     if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05600     if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05601     if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
05602 
05603     return size;
05604 }
05605 
05606 size_t
05607 onig_region_memsize(const OnigRegion *regs)
05608 {
05609     size_t size = sizeof(*regs);
05610     if (IS_NULL(regs)) return 0;
05611     size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
05612     return size;
05613 }
05614 
05615 #define REGEX_TRANSFER(to,from) do {\
05616   (to)->state = ONIG_STATE_MODIFY;\
05617   onig_free_body(to);\
05618   xmemcpy(to, from, sizeof(regex_t));\
05619   xfree(from);\
05620 } while (0)
05621 
05622 extern void
05623 onig_transfer(regex_t* to, regex_t* from)
05624 {
05625   THREAD_ATOMIC_START;
05626   REGEX_TRANSFER(to, from);
05627   THREAD_ATOMIC_END;
05628 }
05629 
05630 #define REGEX_CHAIN_HEAD(reg) do {\
05631   while (IS_NOT_NULL((reg)->chain)) {\
05632     (reg) = (reg)->chain;\
05633   }\
05634 } while (0)
05635 
05636 extern void
05637 onig_chain_link_add(regex_t* to, regex_t* add)
05638 {
05639   THREAD_ATOMIC_START;
05640   REGEX_CHAIN_HEAD(to);
05641   to->chain = add;
05642   THREAD_ATOMIC_END;
05643 }
05644 
05645 extern void
05646 onig_chain_reduce(regex_t* reg)
05647 {
05648   regex_t *head, *prev;
05649 
05650   prev = reg;
05651   head = prev->chain;
05652   if (IS_NOT_NULL(head)) {
05653     reg->state = ONIG_STATE_MODIFY;
05654     while (IS_NOT_NULL(head->chain)) {
05655       prev = head;
05656       head = head->chain;
05657     }
05658     prev->chain = (regex_t* )NULL;
05659     REGEX_TRANSFER(reg, head);
05660   }
05661 }
05662 
05663 #ifdef ONIG_DEBUG
05664 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05665 #endif
05666 #ifdef ONIG_DEBUG_PARSE_TREE
05667 static void print_tree P_((FILE* f, Node* node));
05668 #endif
05669 
05670 extern int
05671 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05672               OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05673 {
05674 #define COMPILE_INIT_SIZE  20
05675 
05676   int r;
05677   OnigDistance init_size;
05678   Node*  root;
05679   ScanEnv  scan_env = {0};
05680 #ifdef USE_SUBEXP_CALL
05681   UnsetAddrList  uslist;
05682 #endif
05683 
05684   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05685 
05686   scan_env.sourcefile = sourcefile;
05687   scan_env.sourceline = sourceline;
05688   reg->state = ONIG_STATE_COMPILING;
05689 
05690 #ifdef ONIG_DEBUG
05691   if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05692 #endif
05693 
05694   if (reg->alloc == 0) {
05695     init_size = (pattern_end - pattern) * 2;
05696     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05697     r = BBUF_INIT(reg, init_size);
05698     if (r != 0) goto end;
05699   }
05700   else
05701     reg->used = 0;
05702 
05703   reg->num_mem            = 0;
05704   reg->num_repeat         = 0;
05705   reg->num_null_check     = 0;
05706   reg->repeat_range_alloc = 0;
05707   reg->repeat_range       = (OnigRepeatRange* )NULL;
05708 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05709   reg->num_comb_exp_check = 0;
05710 #endif
05711 
05712   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05713   if (r != 0) goto err;
05714 
05715 #ifdef ONIG_DEBUG_PARSE_TREE
05716 # if 0
05717   fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05718   if (!onig_is_prelude()) {
05719     print_tree(stderr, root);
05720   }
05721 # endif
05722 #endif
05723 
05724 #ifdef USE_NAMED_GROUP
05725   /* mixed use named group and no-named group */
05726   if (scan_env.num_named > 0 &&
05727       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05728       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05729     if (scan_env.num_named != scan_env.num_mem)
05730       r = disable_noname_group_capture(&root, reg, &scan_env);
05731     else
05732       r = numbered_ref_check(root);
05733 
05734     if (r != 0) goto err;
05735   }
05736 #endif
05737 
05738 #ifdef USE_SUBEXP_CALL
05739   if (scan_env.num_call > 0) {
05740     r = unset_addr_list_init(&uslist, scan_env.num_call);
05741     if (r != 0) goto err;
05742     scan_env.unset_addr_list = &uslist;
05743     r = setup_subexp_call(root, &scan_env);
05744     if (r != 0) goto err_unset;
05745     r = subexp_recursive_check_trav(root, &scan_env);
05746     if (r  < 0) goto err_unset;
05747     r = subexp_inf_recursive_check_trav(root, &scan_env);
05748     if (r != 0) goto err_unset;
05749 
05750     reg->num_call = scan_env.num_call;
05751   }
05752   else
05753     reg->num_call = 0;
05754 #endif
05755 
05756   r = setup_tree(root, reg, IN_ROOT, &scan_env);
05757   if (r != 0) goto err_unset;
05758 
05759 #ifdef ONIG_DEBUG_PARSE_TREE
05760   if (!onig_is_prelude()) print_tree(stderr, root);
05761 #endif
05762 
05763   reg->capture_history  = scan_env.capture_history;
05764   reg->bt_mem_start     = scan_env.bt_mem_start;
05765   reg->bt_mem_start    |= reg->capture_history;
05766   if (IS_FIND_CONDITION(reg->options))
05767     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05768   else {
05769     reg->bt_mem_end  = scan_env.bt_mem_end;
05770     reg->bt_mem_end |= reg->capture_history;
05771   }
05772 
05773 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05774   if (scan_env.backrefed_mem == 0
05775 #ifdef USE_SUBEXP_CALL
05776       || scan_env.num_call == 0
05777 #endif
05778       ) {
05779     setup_comb_exp_check(root, 0, &scan_env);
05780 #ifdef USE_SUBEXP_CALL
05781     if (scan_env.has_recursion != 0) {
05782       scan_env.num_comb_exp_check = 0;
05783     }
05784     else
05785 #endif
05786     if (scan_env.comb_exp_max_regnum > 0) {
05787       int i;
05788       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05789         if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05790           scan_env.num_comb_exp_check = 0;
05791           break;
05792         }
05793       }
05794     }
05795   }
05796 
05797   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05798 #endif
05799 
05800   clear_optimize_info(reg);
05801 #ifndef ONIG_DONT_OPTIMIZE
05802   r = set_optimize_info_from_tree(root, reg, &scan_env);
05803   if (r != 0) goto err_unset;
05804 #endif
05805 
05806   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05807     xfree(scan_env.mem_nodes_dynamic);
05808     scan_env.mem_nodes_dynamic = (Node** )NULL;
05809   }
05810 
05811   r = compile_tree(root, reg);
05812   if (r == 0) {
05813     r = add_opcode(reg, OP_END);
05814 #ifdef USE_SUBEXP_CALL
05815     if (scan_env.num_call > 0) {
05816       r = unset_addr_list_fix(&uslist, reg);
05817       unset_addr_list_end(&uslist);
05818       if (r) goto err;
05819     }
05820 #endif
05821 
05822     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05823       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05824     else {
05825       if (reg->bt_mem_start != 0)
05826         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05827       else
05828         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05829     }
05830   }
05831 #ifdef USE_SUBEXP_CALL
05832   else if (scan_env.num_call > 0) {
05833     unset_addr_list_end(&uslist);
05834   }
05835 #endif
05836   onig_node_free(root);
05837 
05838 #ifdef ONIG_DEBUG_COMPILE
05839 #ifdef USE_NAMED_GROUP
05840   if (!onig_is_prelude()) onig_print_names(stderr, reg);
05841 #endif
05842   if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05843 #endif
05844 
05845  end:
05846   reg->state = ONIG_STATE_NORMAL;
05847   return r;
05848 
05849  err_unset:
05850 #ifdef USE_SUBEXP_CALL
05851   if (scan_env.num_call > 0) {
05852     unset_addr_list_end(&uslist);
05853   }
05854 #endif
05855  err:
05856   if (IS_NOT_NULL(scan_env.error)) {
05857     if (IS_NOT_NULL(einfo)) {
05858       einfo->enc     = scan_env.enc;
05859       einfo->par     = scan_env.error;
05860       einfo->par_end = scan_env.error_end;
05861     }
05862   }
05863 
05864   onig_node_free(root);
05865   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05866       xfree(scan_env.mem_nodes_dynamic);
05867   return r;
05868 }
05869 
05870 #ifdef USE_RECOMPILE_API
05871 extern int
05872 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05873             OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05874             OnigErrorInfo* einfo)
05875 {
05876   int r;
05877   regex_t *new_reg;
05878 
05879   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05880   if (r) return r;
05881   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05882     onig_transfer(reg, new_reg);
05883   }
05884   else {
05885     onig_chain_link_add(reg, new_reg);
05886   }
05887   return 0;
05888 }
05889 #endif
05890 
05891 static int onig_inited = 0;
05892 
05893 extern int
05894 onig_reg_init(regex_t* reg, OnigOptionType option,
05895               OnigCaseFoldType case_fold_flag,
05896               OnigEncoding enc, const OnigSyntaxType* syntax)
05897 {
05898   if (! onig_inited)
05899     onig_init();
05900 
05901   if (IS_NULL(reg))
05902     return ONIGERR_INVALID_ARGUMENT;
05903 
05904   if (ONIGENC_IS_UNDEF(enc))
05905     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
05906 
05907   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05908       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05909     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05910   }
05911 
05912   (reg)->state = ONIG_STATE_MODIFY;
05913 
05914   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05915     option |= syntax->options;
05916     option &= ~ONIG_OPTION_SINGLELINE;
05917   }
05918   else
05919     option |= syntax->options;
05920 
05921   (reg)->enc              = enc;
05922   (reg)->options          = option;
05923   (reg)->syntax           = syntax;
05924   (reg)->optimize         = 0;
05925   (reg)->exact            = (UChar* )NULL;
05926   (reg)->int_map          = (int* )NULL;
05927   (reg)->int_map_backward = (int* )NULL;
05928   (reg)->chain            = (regex_t* )NULL;
05929 
05930   (reg)->p                = (UChar* )NULL;
05931   (reg)->alloc            = 0;
05932   (reg)->used             = 0;
05933   (reg)->name_table       = (void* )NULL;
05934 
05935   (reg)->case_fold_flag   = case_fold_flag;
05936   return 0;
05937 }
05938 
05939 extern int
05940 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05941           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05942           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05943 {
05944   int r;
05945 
05946   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05947   if (r) return r;
05948 
05949   r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05950   return r;
05951 }
05952 
05953 extern int
05954 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05955           OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05956           OnigErrorInfo* einfo)
05957 {
05958   int r;
05959 
05960   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05961   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05962 
05963   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05964   if (r) goto err;
05965 
05966   r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05967   if (r) {
05968   err:
05969     onig_free(*reg);
05970     *reg = NULL;
05971   }
05972   return r;
05973 }
05974 
05975 
05976 extern int
05977 onig_init(void)
05978 {
05979   if (onig_inited != 0)
05980     return 0;
05981 
05982   THREAD_SYSTEM_INIT;
05983   THREAD_ATOMIC_START;
05984 
05985   onig_inited = 1;
05986 
05987   onigenc_init();
05988   /* onigenc_set_default_caseconv_table((UChar* )0); */
05989 
05990 #ifdef ONIG_DEBUG_STATISTICS
05991   onig_statistics_init();
05992 #endif
05993 
05994   THREAD_ATOMIC_END;
05995   return 0;
05996 }
05997 
05998 
05999 extern int
06000 onig_end(void)
06001 {
06002   THREAD_ATOMIC_START;
06003 
06004 #ifdef ONIG_DEBUG_STATISTICS
06005   if (!onig_is_prelude()) onig_print_statistics(stderr);
06006 #endif
06007 
06008 #ifdef USE_SHARED_CCLASS_TABLE
06009   onig_free_shared_cclass_table();
06010 #endif
06011 
06012 #ifdef USE_PARSE_TREE_NODE_RECYCLE
06013   onig_free_node_list();
06014 #endif
06015 
06016   onig_inited = 0;
06017 
06018   THREAD_ATOMIC_END;
06019   THREAD_SYSTEM_END;
06020   return 0;
06021 }
06022 
06023 extern int
06024 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
06025 {
06026   OnigCodePoint n, *data;
06027   OnigCodePoint low, high, x;
06028 
06029   GET_CODE_POINT(n, p);
06030   data = (OnigCodePoint* )p;
06031   data++;
06032 
06033   for (low = 0, high = n; low < high; ) {
06034     x = (low + high) >> 1;
06035     if (code > data[x * 2 + 1])
06036       low = x + 1;
06037     else
06038       high = x;
06039   }
06040 
06041   return ((low < n && code >= data[low * 2]) ? 1 : 0);
06042 }
06043 
06044 extern int
06045 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
06046 {
06047   int found;
06048 
06049   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
06050     if (IS_NULL(cc->mbuf)) {
06051       found = 0;
06052     }
06053     else {
06054       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
06055     }
06056   }
06057   else {
06058     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
06059   }
06060 
06061   if (IS_NCCLASS_NOT(cc))
06062     return !found;
06063   else
06064     return found;
06065 }
06066 
06067 extern int
06068 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
06069 {
06070   int len;
06071 
06072   if (ONIGENC_MBC_MINLEN(enc) > 1) {
06073     len = 2;
06074   }
06075   else {
06076     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
06077   }
06078   return onig_is_code_in_cc_len(len, code, cc);
06079 }
06080 
06081 
06082 #ifdef ONIG_DEBUG
06083 
06084 /* arguments type */
06085 #define ARG_SPECIAL     -1
06086 #define ARG_NON          0
06087 #define ARG_RELADDR      1
06088 #define ARG_ABSADDR      2
06089 #define ARG_LENGTH       3
06090 #define ARG_MEMNUM       4
06091 #define ARG_OPTION       5
06092 #define ARG_STATE_CHECK  6
06093 
06094 OnigOpInfoType OnigOpInfo[] = {
06095   { OP_FINISH,            "finish",          ARG_NON },
06096   { OP_END,               "end",             ARG_NON },
06097   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
06098   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
06099   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
06100   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
06101   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
06102   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
06103   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
06104   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
06105   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
06106   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
06107   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
06108   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
06109   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
06110   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
06111   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
06112   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
06113   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
06114   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
06115   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
06116   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
06117   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
06118   { OP_ANYCHAR,           "anychar",         ARG_NON },
06119   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
06120   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
06121   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
06122   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
06123   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
06124   { OP_WORD,                "word",            ARG_NON },
06125   { OP_NOT_WORD,            "not-word",        ARG_NON },
06126   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
06127   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
06128   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
06129   { OP_WORD_END,            "word-end",        ARG_NON },
06130   { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
06131   { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
06132   { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
06133   { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
06134   { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
06135   { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
06136   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
06137   { OP_END_BUF,             "end-buf",         ARG_NON },
06138   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
06139   { OP_END_LINE,            "end-line",        ARG_NON },
06140   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
06141   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
06142   { OP_BEGIN_POS_OR_LINE,   "begin-pos-or-line",    ARG_NON },
06143   { OP_BACKREF1,            "backref1",             ARG_NON },
06144   { OP_BACKREF2,            "backref2",             ARG_NON },
06145   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
06146   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
06147   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
06148   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
06149   { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
06150   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
06151   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
06152   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
06153   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
06154   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
06155   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
06156   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
06157   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
06158   { OP_KEEP,                "keep",                 ARG_NON },
06159   { OP_FAIL,                "fail",                 ARG_NON },
06160   { OP_JUMP,                "jump",                 ARG_RELADDR },
06161   { OP_PUSH,                "push",                 ARG_RELADDR },
06162   { OP_POP,                 "pop",                  ARG_NON },
06163   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
06164   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
06165   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
06166   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
06167   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
06168   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
06169   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
06170   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
06171   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
06172   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
06173   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
06174   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
06175   { OP_PUSH_POS,             "push-pos",             ARG_NON },
06176   { OP_POP_POS,              "pop-pos",              ARG_NON },
06177   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
06178   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
06179   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
06180   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
06181   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
06182   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
06183   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
06184   { OP_CALL,                 "call",                 ARG_ABSADDR },
06185   { OP_RETURN,               "return",               ARG_NON },
06186   { OP_CONDITION,            "condition",            ARG_SPECIAL },
06187   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
06188   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
06189   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
06190   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
06191   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
06192     "state-check-anychar-ml*", ARG_STATE_CHECK },
06193   { -1, "", ARG_NON }
06194 };
06195 
06196 static const char*
06197 op2name(int opcode)
06198 {
06199   int i;
06200 
06201   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06202     if (opcode == OnigOpInfo[i].opcode)
06203       return OnigOpInfo[i].name;
06204   }
06205   return "";
06206 }
06207 
06208 static int
06209 op2arg_type(int opcode)
06210 {
06211   int i;
06212 
06213   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06214     if (opcode == OnigOpInfo[i].opcode)
06215       return OnigOpInfo[i].arg_type;
06216   }
06217   return ARG_SPECIAL;
06218 }
06219 
06220 static void
06221 Indent(FILE* f, int indent)
06222 {
06223   int i;
06224   for (i = 0; i < indent; i++) putc(' ', f);
06225 }
06226 
06227 static void
06228 p_string(FILE* f, int len, UChar* s)
06229 {
06230   fputs(":", f);
06231   while (len-- > 0) { fputc(*s++, f); }
06232 }
06233 
06234 static void
06235 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
06236 {
06237   int x = len * mb_len;
06238 
06239   fprintf(f, ":%d:", len);
06240   while (x-- > 0) { fputc(*s++, f); }
06241 }
06242 
06243 extern void
06244 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
06245                               OnigEncoding enc)
06246 {
06247   int i, n, arg_type;
06248   RelAddrType addr;
06249   LengthType len;
06250   MemNumType mem;
06251   StateCheckNumType scn;
06252   OnigCodePoint code;
06253   UChar *q;
06254 
06255   fprintf(f, "[%s", op2name(*bp));
06256   arg_type = op2arg_type(*bp);
06257   if (arg_type != ARG_SPECIAL) {
06258     bp++;
06259     switch (arg_type) {
06260     case ARG_NON:
06261       break;
06262     case ARG_RELADDR:
06263       GET_RELADDR_INC(addr, bp);
06264       fprintf(f, ":(%d)", addr);
06265       break;
06266     case ARG_ABSADDR:
06267       GET_ABSADDR_INC(addr, bp);
06268       fprintf(f, ":(%d)", addr);
06269       break;
06270     case ARG_LENGTH:
06271       GET_LENGTH_INC(len, bp);
06272       fprintf(f, ":%d", len);
06273       break;
06274     case ARG_MEMNUM:
06275       mem = *((MemNumType* )bp);
06276       bp += SIZE_MEMNUM;
06277       fprintf(f, ":%d", mem);
06278       break;
06279     case ARG_OPTION:
06280       {
06281         OnigOptionType option = *((OnigOptionType* )bp);
06282         bp += SIZE_OPTION;
06283         fprintf(f, ":%d", option);
06284       }
06285       break;
06286 
06287     case ARG_STATE_CHECK:
06288       scn = *((StateCheckNumType* )bp);
06289       bp += SIZE_STATE_CHECK_NUM;
06290       fprintf(f, ":%d", scn);
06291       break;
06292     }
06293   }
06294   else {
06295     switch (*bp++) {
06296     case OP_EXACT1:
06297     case OP_ANYCHAR_STAR_PEEK_NEXT:
06298     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
06299       p_string(f, 1, bp++); break;
06300     case OP_EXACT2:
06301       p_string(f, 2, bp); bp += 2; break;
06302     case OP_EXACT3:
06303       p_string(f, 3, bp); bp += 3; break;
06304     case OP_EXACT4:
06305       p_string(f, 4, bp); bp += 4; break;
06306     case OP_EXACT5:
06307       p_string(f, 5, bp); bp += 5; break;
06308     case OP_EXACTN:
06309       GET_LENGTH_INC(len, bp);
06310       p_len_string(f, len, 1, bp);
06311       bp += len;
06312       break;
06313 
06314     case OP_EXACTMB2N1:
06315       p_string(f, 2, bp); bp += 2; break;
06316     case OP_EXACTMB2N2:
06317       p_string(f, 4, bp); bp += 4; break;
06318     case OP_EXACTMB2N3:
06319       p_string(f, 6, bp); bp += 6; break;
06320     case OP_EXACTMB2N:
06321       GET_LENGTH_INC(len, bp);
06322       p_len_string(f, len, 2, bp);
06323       bp += len * 2;
06324       break;
06325     case OP_EXACTMB3N:
06326       GET_LENGTH_INC(len, bp);
06327       p_len_string(f, len, 3, bp);
06328       bp += len * 3;
06329       break;
06330     case OP_EXACTMBN:
06331       {
06332         int mb_len;
06333 
06334         GET_LENGTH_INC(mb_len, bp);
06335         GET_LENGTH_INC(len, bp);
06336         fprintf(f, ":%d:%d:", mb_len, len);
06337         n = len * mb_len;
06338         while (n-- > 0) { fputc(*bp++, f); }
06339       }
06340       break;
06341 
06342     case OP_EXACT1_IC:
06343       len = enclen(enc, bp, bpend);
06344       p_string(f, len, bp);
06345       bp += len;
06346       break;
06347     case OP_EXACTN_IC:
06348       GET_LENGTH_INC(len, bp);
06349       p_len_string(f, len, 1, bp);
06350       bp += len;
06351       break;
06352 
06353     case OP_CCLASS:
06354       n = bitset_on_num((BitSetRef )bp);
06355       bp += SIZE_BITSET;
06356       fprintf(f, ":%d", n);
06357       break;
06358 
06359     case OP_CCLASS_NOT:
06360       n = bitset_on_num((BitSetRef )bp);
06361       bp += SIZE_BITSET;
06362       fprintf(f, ":%d", n);
06363       break;
06364 
06365     case OP_CCLASS_MB:
06366     case OP_CCLASS_MB_NOT:
06367       GET_LENGTH_INC(len, bp);
06368       q = bp;
06369 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06370       ALIGNMENT_RIGHT(q);
06371 #endif
06372       GET_CODE_POINT(code, q);
06373       bp += len;
06374       fprintf(f, ":%d:%d", (int )code, len);
06375       break;
06376 
06377     case OP_CCLASS_MIX:
06378     case OP_CCLASS_MIX_NOT:
06379       n = bitset_on_num((BitSetRef )bp);
06380       bp += SIZE_BITSET;
06381       GET_LENGTH_INC(len, bp);
06382       q = bp;
06383 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06384       ALIGNMENT_RIGHT(q);
06385 #endif
06386       GET_CODE_POINT(code, q);
06387       bp += len;
06388       fprintf(f, ":%d:%d:%d", n, (int )code, len);
06389       break;
06390 
06391     case OP_CCLASS_NODE:
06392       {
06393         CClassNode *cc;
06394 
06395         GET_POINTER_INC(cc, bp);
06396         n = bitset_on_num(cc->bs);
06397         fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06398       }
06399       break;
06400 
06401     case OP_BACKREFN_IC:
06402       mem = *((MemNumType* )bp);
06403       bp += SIZE_MEMNUM;
06404       fprintf(f, ":%d", mem);
06405       break;
06406 
06407     case OP_BACKREF_MULTI_IC:
06408     case OP_BACKREF_MULTI:
06409       fputs(" ", f);
06410       GET_LENGTH_INC(len, bp);
06411       for (i = 0; i < len; i++) {
06412         GET_MEMNUM_INC(mem, bp);
06413         if (i > 0) fputs(", ", f);
06414         fprintf(f, "%d", mem);
06415       }
06416       break;
06417 
06418     case OP_BACKREF_WITH_LEVEL:
06419       {
06420         OnigOptionType option;
06421         LengthType level;
06422 
06423         GET_OPTION_INC(option, bp);
06424         fprintf(f, ":%d", option);
06425         GET_LENGTH_INC(level, bp);
06426         fprintf(f, ":%d", level);
06427 
06428         fputs(" ", f);
06429         GET_LENGTH_INC(len, bp);
06430         for (i = 0; i < len; i++) {
06431           GET_MEMNUM_INC(mem, bp);
06432           if (i > 0) fputs(", ", f);
06433           fprintf(f, "%d", mem);
06434         }
06435       }
06436       break;
06437 
06438     case OP_REPEAT:
06439     case OP_REPEAT_NG:
06440       {
06441         mem = *((MemNumType* )bp);
06442         bp += SIZE_MEMNUM;
06443         addr = *((RelAddrType* )bp);
06444         bp += SIZE_RELADDR;
06445         fprintf(f, ":%d:%d", mem, addr);
06446       }
06447       break;
06448 
06449     case OP_PUSH_OR_JUMP_EXACT1:
06450     case OP_PUSH_IF_PEEK_NEXT:
06451       addr = *((RelAddrType* )bp);
06452       bp += SIZE_RELADDR;
06453       fprintf(f, ":(%d)", addr);
06454       p_string(f, 1, bp);
06455       bp += 1;
06456       break;
06457 
06458     case OP_LOOK_BEHIND:
06459       GET_LENGTH_INC(len, bp);
06460       fprintf(f, ":%d", len);
06461       break;
06462 
06463     case OP_PUSH_LOOK_BEHIND_NOT:
06464       GET_RELADDR_INC(addr, bp);
06465       GET_LENGTH_INC(len, bp);
06466       fprintf(f, ":%d:(%d)", len, addr);
06467       break;
06468 
06469     case OP_STATE_CHECK_PUSH:
06470     case OP_STATE_CHECK_PUSH_OR_JUMP:
06471       scn = *((StateCheckNumType* )bp);
06472       bp += SIZE_STATE_CHECK_NUM;
06473       addr = *((RelAddrType* )bp);
06474       bp += SIZE_RELADDR;
06475       fprintf(f, ":%d:(%d)", scn, addr);
06476       break;
06477 
06478     case OP_CONDITION:
06479       GET_MEMNUM_INC(mem, bp);
06480       GET_RELADDR_INC(addr, bp);
06481       fprintf(f, ":%d:(%d)", mem, addr);
06482       break;
06483 
06484     default:
06485       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06486               *--bp);
06487     }
06488   }
06489   fputs("]", f);
06490   if (nextp) *nextp = bp;
06491 }
06492 
06493 static void
06494 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06495 {
06496   int ncode;
06497   UChar* bp = reg->p;
06498   UChar* end = reg->p + reg->used;
06499 
06500   fprintf(f, "code length: %d", reg->used);
06501 
06502   ncode = -1;
06503   while (bp < end) {
06504     ncode++;
06505     if (ncode % 5 == 0)
06506       fprintf(f, "\n%ld:", bp - reg->p);
06507     else
06508       fprintf(f, " %ld:", bp - reg->p);
06509     onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06510   }
06511 
06512   fprintf(f, "\n");
06513 }
06514 
06515 static void
06516 print_indent_tree(FILE* f, Node* node, int indent)
06517 {
06518   int i, type, container_p = 0;
06519   int add = 3;
06520   UChar* p;
06521 
06522   Indent(f, indent);
06523   if (IS_NULL(node)) {
06524     fprintf(f, "ERROR: null node!!!\n");
06525     exit (0);
06526   }
06527 
06528   type = NTYPE(node);
06529   switch (type) {
06530   case NT_LIST:
06531   case NT_ALT:
06532     if (NTYPE(node) == NT_LIST)
06533       fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06534     else
06535       fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06536 
06537     print_indent_tree(f, NCAR(node), indent + add);
06538     while (IS_NOT_NULL(node = NCDR(node))) {
06539       if (NTYPE(node) != type) {
06540         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06541         exit(0);
06542       }
06543       print_indent_tree(f, NCAR(node), indent + add);
06544     }
06545     break;
06546 
06547   case NT_STR:
06548     fprintf(f, "<string%s:%"PRIxPTR">",
06549             (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06550     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06551       if (*p >= 0x20 && *p < 0x7f)
06552         fputc(*p, f);
06553       else {
06554         fprintf(f, " 0x%02x", *p);
06555       }
06556     }
06557     break;
06558 
06559   case NT_CCLASS:
06560     fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06561     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06562     if (NCCLASS(node)->mbuf) {
06563       BBuf* bbuf = NCCLASS(node)->mbuf;
06564       for (i = 0; i < (int )bbuf->used; i++) {
06565         if (i > 0) fprintf(f, ",");
06566         fprintf(f, "%0x", bbuf->p[i]);
06567       }
06568     }
06569     break;
06570 
06571   case NT_CTYPE:
06572     fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06573     switch (NCTYPE(node)->ctype) {
06574     case ONIGENC_CTYPE_WORD:
06575       if (NCTYPE(node)->not != 0)
06576         fputs("not word",       f);
06577       else
06578         fputs("word",           f);
06579       break;
06580 
06581     default:
06582       fprintf(f, "ERROR: undefined ctype.\n");
06583       exit(0);
06584     }
06585     break;
06586 
06587   case NT_CANY:
06588     fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06589     break;
06590 
06591   case NT_ANCHOR:
06592     fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06593     switch (NANCHOR(node)->type) {
06594     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
06595     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
06596     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
06597     case ANCHOR_END_LINE:       fputs("end line",       f); break;
06598     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
06599     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06600     case ANCHOR_ANYCHAR_STAR:   fputs("begin position/line", f); break;
06601 
06602     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
06603     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
06604 #ifdef USE_WORD_BEGIN_END
06605     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
06606     case ANCHOR_WORD_END:        fputs("word end", f);       break;
06607 #endif
06608     case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
06609     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
06610     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
06611     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06612     case ANCHOR_KEEP:            fputs("keep",f);            break;
06613 
06614     default:
06615       fprintf(f, "ERROR: undefined anchor type.\n");
06616       break;
06617     }
06618     break;
06619 
06620   case NT_BREF:
06621     {
06622       int* p;
06623       BRefNode* br = NBREF(node);
06624       p = BACKREFS_P(br);
06625       fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06626       for (i = 0; i < br->back_num; i++) {
06627         if (i > 0) fputs(", ", f);
06628         fprintf(f, "%d", p[i]);
06629       }
06630     }
06631     break;
06632 
06633 #ifdef USE_SUBEXP_CALL
06634   case NT_CALL:
06635     {
06636       CallNode* cn = NCALL(node);
06637       fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06638       p_string(f, cn->name_end - cn->name, cn->name);
06639     }
06640     break;
06641 #endif
06642 
06643   case NT_QTFR:
06644     fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06645             NQTFR(node)->lower, NQTFR(node)->upper,
06646             (NQTFR(node)->greedy ? "" : "?"));
06647     print_indent_tree(f, NQTFR(node)->target, indent + add);
06648     break;
06649 
06650   case NT_ENCLOSE:
06651     fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06652     switch (NENCLOSE(node)->type) {
06653     case ENCLOSE_OPTION:
06654       fprintf(f, "option:%d", NENCLOSE(node)->option);
06655       break;
06656     case ENCLOSE_MEMORY:
06657       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06658       break;
06659     case ENCLOSE_STOP_BACKTRACK:
06660       fprintf(f, "stop-bt");
06661       break;
06662     case ENCLOSE_CONDITION:
06663       fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
06664       break;
06665 
06666     default:
06667       break;
06668     }
06669     fprintf(f, "\n");
06670     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06671     break;
06672 
06673   default:
06674     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06675     break;
06676   }
06677 
06678   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06679       type != NT_ENCLOSE)
06680     fprintf(f, "\n");
06681 
06682   if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06683 
06684   fflush(f);
06685 }
06686 #endif /* ONIG_DEBUG */
06687 
06688 #ifdef ONIG_DEBUG_PARSE_TREE
06689 static void
06690 print_tree(FILE* f, Node* node)
06691 {
06692   print_indent_tree(f, node, 0);
06693 }
06694 #endif
06695