|
Ruby
2.0.0p481(2014-05-08revision45883)
|
00001 00002 /* 00003 * Introduction 00004 * ************ 00005 * 00006 * The following notes assume that you are familiar with the YAML specification 00007 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in 00008 * some cases we are less restrictive that it requires. 00009 * 00010 * The process of transforming a YAML stream into a sequence of events is 00011 * divided on two steps: Scanning and Parsing. 00012 * 00013 * The Scanner transforms the input stream into a sequence of tokens, while the 00014 * parser transform the sequence of tokens produced by the Scanner into a 00015 * sequence of parsing events. 00016 * 00017 * The Scanner is rather clever and complicated. The Parser, on the contrary, 00018 * is a straightforward implementation of a recursive-descendant parser (or, 00019 * LL(1) parser, as it is usually called). 00020 * 00021 * Actually there are two issues of Scanning that might be called "clever", the 00022 * rest is quite straightforward. The issues are "block collection start" and 00023 * "simple keys". Both issues are explained below in details. 00024 * 00025 * Here the Scanning step is explained and implemented. We start with the list 00026 * of all the tokens produced by the Scanner together with short descriptions. 00027 * 00028 * Now, tokens: 00029 * 00030 * STREAM-START(encoding) # The stream start. 00031 * STREAM-END # The stream end. 00032 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. 00033 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. 00034 * DOCUMENT-START # '---' 00035 * DOCUMENT-END # '...' 00036 * BLOCK-SEQUENCE-START # Indentation increase denoting a block 00037 * BLOCK-MAPPING-START # sequence or a block mapping. 00038 * BLOCK-END # Indentation decrease. 00039 * FLOW-SEQUENCE-START # '[' 00040 * FLOW-SEQUENCE-END # ']' 00041 * BLOCK-SEQUENCE-START # '{' 00042 * BLOCK-SEQUENCE-END # '}' 00043 * BLOCK-ENTRY # '-' 00044 * FLOW-ENTRY # ',' 00045 * KEY # '?' or nothing (simple keys). 00046 * VALUE # ':' 00047 * ALIAS(anchor) # '*anchor' 00048 * ANCHOR(anchor) # '&anchor' 00049 * TAG(handle,suffix) # '!handle!suffix' 00050 * SCALAR(value,style) # A scalar. 00051 * 00052 * The following two tokens are "virtual" tokens denoting the beginning and the 00053 * end of the stream: 00054 * 00055 * STREAM-START(encoding) 00056 * STREAM-END 00057 * 00058 * We pass the information about the input stream encoding with the 00059 * STREAM-START token. 00060 * 00061 * The next two tokens are responsible for tags: 00062 * 00063 * VERSION-DIRECTIVE(major,minor) 00064 * TAG-DIRECTIVE(handle,prefix) 00065 * 00066 * Example: 00067 * 00068 * %YAML 1.1 00069 * %TAG ! !foo 00070 * %TAG !yaml! tag:yaml.org,2002: 00071 * --- 00072 * 00073 * The correspoding sequence of tokens: 00074 * 00075 * STREAM-START(utf-8) 00076 * VERSION-DIRECTIVE(1,1) 00077 * TAG-DIRECTIVE("!","!foo") 00078 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") 00079 * DOCUMENT-START 00080 * STREAM-END 00081 * 00082 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole 00083 * line. 00084 * 00085 * The document start and end indicators are represented by: 00086 * 00087 * DOCUMENT-START 00088 * DOCUMENT-END 00089 * 00090 * Note that if a YAML stream contains an implicit document (without '---' 00091 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be 00092 * produced. 00093 * 00094 * In the following examples, we present whole documents together with the 00095 * produced tokens. 00096 * 00097 * 1. An implicit document: 00098 * 00099 * 'a scalar' 00100 * 00101 * Tokens: 00102 * 00103 * STREAM-START(utf-8) 00104 * SCALAR("a scalar",single-quoted) 00105 * STREAM-END 00106 * 00107 * 2. An explicit document: 00108 * 00109 * --- 00110 * 'a scalar' 00111 * ... 00112 * 00113 * Tokens: 00114 * 00115 * STREAM-START(utf-8) 00116 * DOCUMENT-START 00117 * SCALAR("a scalar",single-quoted) 00118 * DOCUMENT-END 00119 * STREAM-END 00120 * 00121 * 3. Several documents in a stream: 00122 * 00123 * 'a scalar' 00124 * --- 00125 * 'another scalar' 00126 * --- 00127 * 'yet another scalar' 00128 * 00129 * Tokens: 00130 * 00131 * STREAM-START(utf-8) 00132 * SCALAR("a scalar",single-quoted) 00133 * DOCUMENT-START 00134 * SCALAR("another scalar",single-quoted) 00135 * DOCUMENT-START 00136 * SCALAR("yet another scalar",single-quoted) 00137 * STREAM-END 00138 * 00139 * We have already introduced the SCALAR token above. The following tokens are 00140 * used to describe aliases, anchors, tag, and scalars: 00141 * 00142 * ALIAS(anchor) 00143 * ANCHOR(anchor) 00144 * TAG(handle,suffix) 00145 * SCALAR(value,style) 00146 * 00147 * The following series of examples illustrate the usage of these tokens: 00148 * 00149 * 1. A recursive sequence: 00150 * 00151 * &A [ *A ] 00152 * 00153 * Tokens: 00154 * 00155 * STREAM-START(utf-8) 00156 * ANCHOR("A") 00157 * FLOW-SEQUENCE-START 00158 * ALIAS("A") 00159 * FLOW-SEQUENCE-END 00160 * STREAM-END 00161 * 00162 * 2. A tagged scalar: 00163 * 00164 * !!float "3.14" # A good approximation. 00165 * 00166 * Tokens: 00167 * 00168 * STREAM-START(utf-8) 00169 * TAG("!!","float") 00170 * SCALAR("3.14",double-quoted) 00171 * STREAM-END 00172 * 00173 * 3. Various scalar styles: 00174 * 00175 * --- # Implicit empty plain scalars do not produce tokens. 00176 * --- a plain scalar 00177 * --- 'a single-quoted scalar' 00178 * --- "a double-quoted scalar" 00179 * --- |- 00180 * a literal scalar 00181 * --- >- 00182 * a folded 00183 * scalar 00184 * 00185 * Tokens: 00186 * 00187 * STREAM-START(utf-8) 00188 * DOCUMENT-START 00189 * DOCUMENT-START 00190 * SCALAR("a plain scalar",plain) 00191 * DOCUMENT-START 00192 * SCALAR("a single-quoted scalar",single-quoted) 00193 * DOCUMENT-START 00194 * SCALAR("a double-quoted scalar",double-quoted) 00195 * DOCUMENT-START 00196 * SCALAR("a literal scalar",literal) 00197 * DOCUMENT-START 00198 * SCALAR("a folded scalar",folded) 00199 * STREAM-END 00200 * 00201 * Now it's time to review collection-related tokens. We will start with 00202 * flow collections: 00203 * 00204 * FLOW-SEQUENCE-START 00205 * FLOW-SEQUENCE-END 00206 * FLOW-MAPPING-START 00207 * FLOW-MAPPING-END 00208 * FLOW-ENTRY 00209 * KEY 00210 * VALUE 00211 * 00212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and 00213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' 00214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the 00215 * indicators '?' and ':', which are used for denoting mapping keys and values, 00216 * are represented by the KEY and VALUE tokens. 00217 * 00218 * The following examples show flow collections: 00219 * 00220 * 1. A flow sequence: 00221 * 00222 * [item 1, item 2, item 3] 00223 * 00224 * Tokens: 00225 * 00226 * STREAM-START(utf-8) 00227 * FLOW-SEQUENCE-START 00228 * SCALAR("item 1",plain) 00229 * FLOW-ENTRY 00230 * SCALAR("item 2",plain) 00231 * FLOW-ENTRY 00232 * SCALAR("item 3",plain) 00233 * FLOW-SEQUENCE-END 00234 * STREAM-END 00235 * 00236 * 2. A flow mapping: 00237 * 00238 * { 00239 * a simple key: a value, # Note that the KEY token is produced. 00240 * ? a complex key: another value, 00241 * } 00242 * 00243 * Tokens: 00244 * 00245 * STREAM-START(utf-8) 00246 * FLOW-MAPPING-START 00247 * KEY 00248 * SCALAR("a simple key",plain) 00249 * VALUE 00250 * SCALAR("a value",plain) 00251 * FLOW-ENTRY 00252 * KEY 00253 * SCALAR("a complex key",plain) 00254 * VALUE 00255 * SCALAR("another value",plain) 00256 * FLOW-ENTRY 00257 * FLOW-MAPPING-END 00258 * STREAM-END 00259 * 00260 * A simple key is a key which is not denoted by the '?' indicator. Note that 00261 * the Scanner still produce the KEY token whenever it encounters a simple key. 00262 * 00263 * For scanning block collections, the following tokens are used (note that we 00264 * repeat KEY and VALUE here): 00265 * 00266 * BLOCK-SEQUENCE-START 00267 * BLOCK-MAPPING-START 00268 * BLOCK-END 00269 * BLOCK-ENTRY 00270 * KEY 00271 * VALUE 00272 * 00273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation 00274 * increase that precedes a block collection (cf. the INDENT token in Python). 00275 * The token BLOCK-END denote indentation decrease that ends a block collection 00276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities 00277 * that makes detections of these tokens more complex. 00278 * 00279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators 00280 * '-', '?', and ':' correspondingly. 00281 * 00282 * The following examples show how the tokens BLOCK-SEQUENCE-START, 00283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: 00284 * 00285 * 1. Block sequences: 00286 * 00287 * - item 1 00288 * - item 2 00289 * - 00290 * - item 3.1 00291 * - item 3.2 00292 * - 00293 * key 1: value 1 00294 * key 2: value 2 00295 * 00296 * Tokens: 00297 * 00298 * STREAM-START(utf-8) 00299 * BLOCK-SEQUENCE-START 00300 * BLOCK-ENTRY 00301 * SCALAR("item 1",plain) 00302 * BLOCK-ENTRY 00303 * SCALAR("item 2",plain) 00304 * BLOCK-ENTRY 00305 * BLOCK-SEQUENCE-START 00306 * BLOCK-ENTRY 00307 * SCALAR("item 3.1",plain) 00308 * BLOCK-ENTRY 00309 * SCALAR("item 3.2",plain) 00310 * BLOCK-END 00311 * BLOCK-ENTRY 00312 * BLOCK-MAPPING-START 00313 * KEY 00314 * SCALAR("key 1",plain) 00315 * VALUE 00316 * SCALAR("value 1",plain) 00317 * KEY 00318 * SCALAR("key 2",plain) 00319 * VALUE 00320 * SCALAR("value 2",plain) 00321 * BLOCK-END 00322 * BLOCK-END 00323 * STREAM-END 00324 * 00325 * 2. Block mappings: 00326 * 00327 * a simple key: a value # The KEY token is produced here. 00328 * ? a complex key 00329 * : another value 00330 * a mapping: 00331 * key 1: value 1 00332 * key 2: value 2 00333 * a sequence: 00334 * - item 1 00335 * - item 2 00336 * 00337 * Tokens: 00338 * 00339 * STREAM-START(utf-8) 00340 * BLOCK-MAPPING-START 00341 * KEY 00342 * SCALAR("a simple key",plain) 00343 * VALUE 00344 * SCALAR("a value",plain) 00345 * KEY 00346 * SCALAR("a complex key",plain) 00347 * VALUE 00348 * SCALAR("another value",plain) 00349 * KEY 00350 * SCALAR("a mapping",plain) 00351 * BLOCK-MAPPING-START 00352 * KEY 00353 * SCALAR("key 1",plain) 00354 * VALUE 00355 * SCALAR("value 1",plain) 00356 * KEY 00357 * SCALAR("key 2",plain) 00358 * VALUE 00359 * SCALAR("value 2",plain) 00360 * BLOCK-END 00361 * KEY 00362 * SCALAR("a sequence",plain) 00363 * VALUE 00364 * BLOCK-SEQUENCE-START 00365 * BLOCK-ENTRY 00366 * SCALAR("item 1",plain) 00367 * BLOCK-ENTRY 00368 * SCALAR("item 2",plain) 00369 * BLOCK-END 00370 * BLOCK-END 00371 * STREAM-END 00372 * 00373 * YAML does not always require to start a new block collection from a new 00374 * line. If the current line contains only '-', '?', and ':' indicators, a new 00375 * block collection may start at the current line. The following examples 00376 * illustrate this case: 00377 * 00378 * 1. Collections in a sequence: 00379 * 00380 * - - item 1 00381 * - item 2 00382 * - key 1: value 1 00383 * key 2: value 2 00384 * - ? complex key 00385 * : complex value 00386 * 00387 * Tokens: 00388 * 00389 * STREAM-START(utf-8) 00390 * BLOCK-SEQUENCE-START 00391 * BLOCK-ENTRY 00392 * BLOCK-SEQUENCE-START 00393 * BLOCK-ENTRY 00394 * SCALAR("item 1",plain) 00395 * BLOCK-ENTRY 00396 * SCALAR("item 2",plain) 00397 * BLOCK-END 00398 * BLOCK-ENTRY 00399 * BLOCK-MAPPING-START 00400 * KEY 00401 * SCALAR("key 1",plain) 00402 * VALUE 00403 * SCALAR("value 1",plain) 00404 * KEY 00405 * SCALAR("key 2",plain) 00406 * VALUE 00407 * SCALAR("value 2",plain) 00408 * BLOCK-END 00409 * BLOCK-ENTRY 00410 * BLOCK-MAPPING-START 00411 * KEY 00412 * SCALAR("complex key") 00413 * VALUE 00414 * SCALAR("complex value") 00415 * BLOCK-END 00416 * BLOCK-END 00417 * STREAM-END 00418 * 00419 * 2. Collections in a mapping: 00420 * 00421 * ? a sequence 00422 * : - item 1 00423 * - item 2 00424 * ? a mapping 00425 * : key 1: value 1 00426 * key 2: value 2 00427 * 00428 * Tokens: 00429 * 00430 * STREAM-START(utf-8) 00431 * BLOCK-MAPPING-START 00432 * KEY 00433 * SCALAR("a sequence",plain) 00434 * VALUE 00435 * BLOCK-SEQUENCE-START 00436 * BLOCK-ENTRY 00437 * SCALAR("item 1",plain) 00438 * BLOCK-ENTRY 00439 * SCALAR("item 2",plain) 00440 * BLOCK-END 00441 * KEY 00442 * SCALAR("a mapping",plain) 00443 * VALUE 00444 * BLOCK-MAPPING-START 00445 * KEY 00446 * SCALAR("key 1",plain) 00447 * VALUE 00448 * SCALAR("value 1",plain) 00449 * KEY 00450 * SCALAR("key 2",plain) 00451 * VALUE 00452 * SCALAR("value 2",plain) 00453 * BLOCK-END 00454 * BLOCK-END 00455 * STREAM-END 00456 * 00457 * YAML also permits non-indented sequences if they are included into a block 00458 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: 00459 * 00460 * key: 00461 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here. 00462 * - item 2 00463 * 00464 * Tokens: 00465 * 00466 * STREAM-START(utf-8) 00467 * BLOCK-MAPPING-START 00468 * KEY 00469 * SCALAR("key",plain) 00470 * VALUE 00471 * BLOCK-ENTRY 00472 * SCALAR("item 1",plain) 00473 * BLOCK-ENTRY 00474 * SCALAR("item 2",plain) 00475 * BLOCK-END 00476 */ 00477 00478 #include "yaml_private.h" 00479 00480 /* 00481 * Ensure that the buffer contains the required number of characters. 00482 * Return 1 on success, 0 on failure (reader error or memory error). 00483 */ 00484 00485 #define CACHE(parser,length) \ 00486 (parser->unread >= (length) \ 00487 ? 1 \ 00488 : yaml_parser_update_buffer(parser, (length))) 00489 00490 /* 00491 * Advance the buffer pointer. 00492 */ 00493 00494 #define SKIP(parser) \ 00495 (parser->mark.index ++, \ 00496 parser->mark.column ++, \ 00497 parser->unread --, \ 00498 parser->buffer.pointer += WIDTH(parser->buffer)) 00499 00500 #define SKIP_LINE(parser) \ 00501 (IS_CRLF(parser->buffer) ? \ 00502 (parser->mark.index += 2, \ 00503 parser->mark.column = 0, \ 00504 parser->mark.line ++, \ 00505 parser->unread -= 2, \ 00506 parser->buffer.pointer += 2) : \ 00507 IS_BREAK(parser->buffer) ? \ 00508 (parser->mark.index ++, \ 00509 parser->mark.column = 0, \ 00510 parser->mark.line ++, \ 00511 parser->unread --, \ 00512 parser->buffer.pointer += WIDTH(parser->buffer)) : 0) 00513 00514 /* 00515 * Copy a character to a string buffer and advance pointers. 00516 */ 00517 00518 #define READ(parser,string) \ 00519 (STRING_EXTEND(parser,string) ? \ 00520 (COPY(string,parser->buffer), \ 00521 parser->mark.index ++, \ 00522 parser->mark.column ++, \ 00523 parser->unread --, \ 00524 1) : 0) 00525 00526 /* 00527 * Copy a line break character to a string buffer and advance pointers. 00528 */ 00529 00530 #define READ_LINE(parser,string) \ 00531 (STRING_EXTEND(parser,string) ? \ 00532 (((CHECK_AT(parser->buffer,'\r',0) \ 00533 && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \ 00534 (*((string).pointer++) = (yaml_char_t) '\n', \ 00535 parser->buffer.pointer += 2, \ 00536 parser->mark.index += 2, \ 00537 parser->mark.column = 0, \ 00538 parser->mark.line ++, \ 00539 parser->unread -= 2) : \ 00540 (CHECK_AT(parser->buffer,'\r',0) \ 00541 || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \ 00542 (*((string).pointer++) = (yaml_char_t) '\n', \ 00543 parser->buffer.pointer ++, \ 00544 parser->mark.index ++, \ 00545 parser->mark.column = 0, \ 00546 parser->mark.line ++, \ 00547 parser->unread --) : \ 00548 (CHECK_AT(parser->buffer,'\xC2',0) \ 00549 && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \ 00550 (*((string).pointer++) = (yaml_char_t) '\n', \ 00551 parser->buffer.pointer += 2, \ 00552 parser->mark.index ++, \ 00553 parser->mark.column = 0, \ 00554 parser->mark.line ++, \ 00555 parser->unread --) : \ 00556 (CHECK_AT(parser->buffer,'\xE2',0) && \ 00557 CHECK_AT(parser->buffer,'\x80',1) && \ 00558 (CHECK_AT(parser->buffer,'\xA8',2) || \ 00559 CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ 00560 (*((string).pointer++) = *(parser->buffer.pointer++), \ 00561 *((string).pointer++) = *(parser->buffer.pointer++), \ 00562 *((string).pointer++) = *(parser->buffer.pointer++), \ 00563 parser->mark.index ++, \ 00564 parser->mark.column = 0, \ 00565 parser->mark.line ++, \ 00566 parser->unread --) : 0), \ 00567 1) : 0) 00568 00569 /* 00570 * Public API declarations. 00571 */ 00572 00573 YAML_DECLARE(int) 00574 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); 00575 00576 /* 00577 * Error handling. 00578 */ 00579 00580 static int 00581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, 00582 yaml_mark_t context_mark, const char *problem); 00583 00584 /* 00585 * High-level token API. 00586 */ 00587 00588 YAML_DECLARE(int) 00589 yaml_parser_fetch_more_tokens(yaml_parser_t *parser); 00590 00591 static int 00592 yaml_parser_fetch_next_token(yaml_parser_t *parser); 00593 00594 /* 00595 * Potential simple keys. 00596 */ 00597 00598 static int 00599 yaml_parser_stale_simple_keys(yaml_parser_t *parser); 00600 00601 static int 00602 yaml_parser_save_simple_key(yaml_parser_t *parser); 00603 00604 static int 00605 yaml_parser_remove_simple_key(yaml_parser_t *parser); 00606 00607 static int 00608 yaml_parser_increase_flow_level(yaml_parser_t *parser); 00609 00610 static int 00611 yaml_parser_decrease_flow_level(yaml_parser_t *parser); 00612 00613 /* 00614 * Indentation treatment. 00615 */ 00616 00617 static int 00618 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column, 00619 ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark); 00620 00621 static int 00622 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column); 00623 00624 /* 00625 * Token fetchers. 00626 */ 00627 00628 static int 00629 yaml_parser_fetch_stream_start(yaml_parser_t *parser); 00630 00631 static int 00632 yaml_parser_fetch_stream_end(yaml_parser_t *parser); 00633 00634 static int 00635 yaml_parser_fetch_directive(yaml_parser_t *parser); 00636 00637 static int 00638 yaml_parser_fetch_document_indicator(yaml_parser_t *parser, 00639 yaml_token_type_t type); 00640 00641 static int 00642 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, 00643 yaml_token_type_t type); 00644 00645 static int 00646 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, 00647 yaml_token_type_t type); 00648 00649 static int 00650 yaml_parser_fetch_flow_entry(yaml_parser_t *parser); 00651 00652 static int 00653 yaml_parser_fetch_block_entry(yaml_parser_t *parser); 00654 00655 static int 00656 yaml_parser_fetch_key(yaml_parser_t *parser); 00657 00658 static int 00659 yaml_parser_fetch_value(yaml_parser_t *parser); 00660 00661 static int 00662 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type); 00663 00664 static int 00665 yaml_parser_fetch_tag(yaml_parser_t *parser); 00666 00667 static int 00668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); 00669 00670 static int 00671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); 00672 00673 static int 00674 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser); 00675 00676 /* 00677 * Token scanners. 00678 */ 00679 00680 static int 00681 yaml_parser_scan_to_next_token(yaml_parser_t *parser); 00682 00683 static int 00684 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token); 00685 00686 static int 00687 yaml_parser_scan_directive_name(yaml_parser_t *parser, 00688 yaml_mark_t start_mark, yaml_char_t **name); 00689 00690 static int 00691 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 00692 yaml_mark_t start_mark, int *major, int *minor); 00693 00694 static int 00695 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 00696 yaml_mark_t start_mark, int *number); 00697 00698 static int 00699 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 00700 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); 00701 00702 static int 00703 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, 00704 yaml_token_type_t type); 00705 00706 static int 00707 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token); 00708 00709 static int 00710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, 00711 yaml_mark_t start_mark, yaml_char_t **handle); 00712 00713 static int 00714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 00715 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri); 00716 00717 static int 00718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 00719 yaml_mark_t start_mark, yaml_string_t *string); 00720 00721 static int 00722 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, 00723 int literal); 00724 00725 static int 00726 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, 00727 int *indent, yaml_string_t *breaks, 00728 yaml_mark_t start_mark, yaml_mark_t *end_mark); 00729 00730 static int 00731 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, 00732 int single); 00733 00734 static int 00735 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token); 00736 00737 /* 00738 * Get the next token. 00739 */ 00740 00741 YAML_DECLARE(int) 00742 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) 00743 { 00744 assert(parser); /* Non-NULL parser object is expected. */ 00745 assert(token); /* Non-NULL token object is expected. */ 00746 00747 /* Erase the token object. */ 00748 00749 memset(token, 0, sizeof(yaml_token_t)); 00750 00751 /* No tokens after STREAM-END or error. */ 00752 00753 if (parser->stream_end_produced || parser->error) { 00754 return 1; 00755 } 00756 00757 /* Ensure that the tokens queue contains enough tokens. */ 00758 00759 if (!parser->token_available) { 00760 if (!yaml_parser_fetch_more_tokens(parser)) 00761 return 0; 00762 } 00763 00764 /* Fetch the next token from the queue. */ 00765 00766 *token = DEQUEUE(parser, parser->tokens); 00767 parser->token_available = 0; 00768 parser->tokens_parsed ++; 00769 00770 if (token->type == YAML_STREAM_END_TOKEN) { 00771 parser->stream_end_produced = 1; 00772 } 00773 00774 return 1; 00775 } 00776 00777 /* 00778 * Set the scanner error and return 0. 00779 */ 00780 00781 static int 00782 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, 00783 yaml_mark_t context_mark, const char *problem) 00784 { 00785 parser->error = YAML_SCANNER_ERROR; 00786 parser->context = context; 00787 parser->context_mark = context_mark; 00788 parser->problem = problem; 00789 parser->problem_mark = parser->mark; 00790 00791 return 0; 00792 } 00793 00794 /* 00795 * Ensure that the tokens queue contains at least one token which can be 00796 * returned to the Parser. 00797 */ 00798 00799 YAML_DECLARE(int) 00800 yaml_parser_fetch_more_tokens(yaml_parser_t *parser) 00801 { 00802 int need_more_tokens; 00803 00804 /* While we need more tokens to fetch, do it. */ 00805 00806 while (1) 00807 { 00808 /* 00809 * Check if we really need to fetch more tokens. 00810 */ 00811 00812 need_more_tokens = 0; 00813 00814 if (parser->tokens.head == parser->tokens.tail) 00815 { 00816 /* Queue is empty. */ 00817 00818 need_more_tokens = 1; 00819 } 00820 else 00821 { 00822 yaml_simple_key_t *simple_key; 00823 00824 /* Check if any potential simple key may occupy the head position. */ 00825 00826 if (!yaml_parser_stale_simple_keys(parser)) 00827 return 0; 00828 00829 for (simple_key = parser->simple_keys.start; 00830 simple_key != parser->simple_keys.top; simple_key++) { 00831 if (simple_key->possible 00832 && simple_key->token_number == parser->tokens_parsed) { 00833 need_more_tokens = 1; 00834 break; 00835 } 00836 } 00837 } 00838 00839 /* We are finished. */ 00840 00841 if (!need_more_tokens) 00842 break; 00843 00844 /* Fetch the next token. */ 00845 00846 if (!yaml_parser_fetch_next_token(parser)) 00847 return 0; 00848 } 00849 00850 parser->token_available = 1; 00851 00852 return 1; 00853 } 00854 00855 /* 00856 * The dispatcher for token fetchers. 00857 */ 00858 00859 static int 00860 yaml_parser_fetch_next_token(yaml_parser_t *parser) 00861 { 00862 /* Ensure that the buffer is initialized. */ 00863 00864 if (!CACHE(parser, 1)) 00865 return 0; 00866 00867 /* Check if we just started scanning. Fetch STREAM-START then. */ 00868 00869 if (!parser->stream_start_produced) 00870 return yaml_parser_fetch_stream_start(parser); 00871 00872 /* Eat whitespaces and comments until we reach the next token. */ 00873 00874 if (!yaml_parser_scan_to_next_token(parser)) 00875 return 0; 00876 00877 /* Remove obsolete potential simple keys. */ 00878 00879 if (!yaml_parser_stale_simple_keys(parser)) 00880 return 0; 00881 00882 /* Check the indentation level against the current column. */ 00883 00884 if (!yaml_parser_unroll_indent(parser, parser->mark.column)) 00885 return 0; 00886 00887 /* 00888 * Ensure that the buffer contains at least 4 characters. 4 is the length 00889 * of the longest indicators ('--- ' and '... '). 00890 */ 00891 00892 if (!CACHE(parser, 4)) 00893 return 0; 00894 00895 /* Is it the end of the stream? */ 00896 00897 if (IS_Z(parser->buffer)) 00898 return yaml_parser_fetch_stream_end(parser); 00899 00900 /* Is it a directive? */ 00901 00902 if (parser->mark.column == 0 && CHECK(parser->buffer, '%')) 00903 return yaml_parser_fetch_directive(parser); 00904 00905 /* Is it the document start indicator? */ 00906 00907 if (parser->mark.column == 0 00908 && CHECK_AT(parser->buffer, '-', 0) 00909 && CHECK_AT(parser->buffer, '-', 1) 00910 && CHECK_AT(parser->buffer, '-', 2) 00911 && IS_BLANKZ_AT(parser->buffer, 3)) 00912 return yaml_parser_fetch_document_indicator(parser, 00913 YAML_DOCUMENT_START_TOKEN); 00914 00915 /* Is it the document end indicator? */ 00916 00917 if (parser->mark.column == 0 00918 && CHECK_AT(parser->buffer, '.', 0) 00919 && CHECK_AT(parser->buffer, '.', 1) 00920 && CHECK_AT(parser->buffer, '.', 2) 00921 && IS_BLANKZ_AT(parser->buffer, 3)) 00922 return yaml_parser_fetch_document_indicator(parser, 00923 YAML_DOCUMENT_END_TOKEN); 00924 00925 /* Is it the flow sequence start indicator? */ 00926 00927 if (CHECK(parser->buffer, '[')) 00928 return yaml_parser_fetch_flow_collection_start(parser, 00929 YAML_FLOW_SEQUENCE_START_TOKEN); 00930 00931 /* Is it the flow mapping start indicator? */ 00932 00933 if (CHECK(parser->buffer, '{')) 00934 return yaml_parser_fetch_flow_collection_start(parser, 00935 YAML_FLOW_MAPPING_START_TOKEN); 00936 00937 /* Is it the flow sequence end indicator? */ 00938 00939 if (CHECK(parser->buffer, ']')) 00940 return yaml_parser_fetch_flow_collection_end(parser, 00941 YAML_FLOW_SEQUENCE_END_TOKEN); 00942 00943 /* Is it the flow mapping end indicator? */ 00944 00945 if (CHECK(parser->buffer, '}')) 00946 return yaml_parser_fetch_flow_collection_end(parser, 00947 YAML_FLOW_MAPPING_END_TOKEN); 00948 00949 /* Is it the flow entry indicator? */ 00950 00951 if (CHECK(parser->buffer, ',')) 00952 return yaml_parser_fetch_flow_entry(parser); 00953 00954 /* Is it the block entry indicator? */ 00955 00956 if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1)) 00957 return yaml_parser_fetch_block_entry(parser); 00958 00959 /* Is it the key indicator? */ 00960 00961 if (CHECK(parser->buffer, '?') 00962 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) 00963 return yaml_parser_fetch_key(parser); 00964 00965 /* Is it the value indicator? */ 00966 00967 if (CHECK(parser->buffer, ':') 00968 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) 00969 return yaml_parser_fetch_value(parser); 00970 00971 /* Is it an alias? */ 00972 00973 if (CHECK(parser->buffer, '*')) 00974 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN); 00975 00976 /* Is it an anchor? */ 00977 00978 if (CHECK(parser->buffer, '&')) 00979 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN); 00980 00981 /* Is it a tag? */ 00982 00983 if (CHECK(parser->buffer, '!')) 00984 return yaml_parser_fetch_tag(parser); 00985 00986 /* Is it a literal scalar? */ 00987 00988 if (CHECK(parser->buffer, '|') && !parser->flow_level) 00989 return yaml_parser_fetch_block_scalar(parser, 1); 00990 00991 /* Is it a folded scalar? */ 00992 00993 if (CHECK(parser->buffer, '>') && !parser->flow_level) 00994 return yaml_parser_fetch_block_scalar(parser, 0); 00995 00996 /* Is it a single-quoted scalar? */ 00997 00998 if (CHECK(parser->buffer, '\'')) 00999 return yaml_parser_fetch_flow_scalar(parser, 1); 01000 01001 /* Is it a double-quoted scalar? */ 01002 01003 if (CHECK(parser->buffer, '"')) 01004 return yaml_parser_fetch_flow_scalar(parser, 0); 01005 01006 /* 01007 * Is it a plain scalar? 01008 * 01009 * A plain scalar may start with any non-blank characters except 01010 * 01011 * '-', '?', ':', ',', '[', ']', '{', '}', 01012 * '#', '&', '*', '!', '|', '>', '\'', '\"', 01013 * '%', '@', '`'. 01014 * 01015 * In the block context (and, for the '-' indicator, in the flow context 01016 * too), it may also start with the characters 01017 * 01018 * '-', '?', ':' 01019 * 01020 * if it is followed by a non-space character. 01021 * 01022 * The last rule is more restrictive than the specification requires. 01023 */ 01024 01025 if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-') 01026 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':') 01027 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[') 01028 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') 01029 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#') 01030 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*') 01031 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|') 01032 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'') 01033 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%') 01034 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) || 01035 (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) || 01036 (!parser->flow_level && 01037 (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')) 01038 && !IS_BLANKZ_AT(parser->buffer, 1))) 01039 return yaml_parser_fetch_plain_scalar(parser); 01040 01041 /* 01042 * If we don't determine the token type so far, it is an error. 01043 */ 01044 01045 return yaml_parser_set_scanner_error(parser, 01046 "while scanning for the next token", parser->mark, 01047 "found character that cannot start any token"); 01048 } 01049 01050 /* 01051 * Check the list of potential simple keys and remove the positions that 01052 * cannot contain simple keys anymore. 01053 */ 01054 01055 static int 01056 yaml_parser_stale_simple_keys(yaml_parser_t *parser) 01057 { 01058 yaml_simple_key_t *simple_key; 01059 01060 /* Check for a potential simple key for each flow level. */ 01061 01062 for (simple_key = parser->simple_keys.start; 01063 simple_key != parser->simple_keys.top; simple_key ++) 01064 { 01065 /* 01066 * The specification requires that a simple key 01067 * 01068 * - is limited to a single line, 01069 * - is shorter than 1024 characters. 01070 */ 01071 01072 if (simple_key->possible 01073 && (simple_key->mark.line < parser->mark.line 01074 || simple_key->mark.index+1024 < parser->mark.index)) { 01075 01076 /* Check if the potential simple key to be removed is required. */ 01077 01078 if (simple_key->required) { 01079 return yaml_parser_set_scanner_error(parser, 01080 "while scanning a simple key", simple_key->mark, 01081 "could not find expected ':'"); 01082 } 01083 01084 simple_key->possible = 0; 01085 } 01086 } 01087 01088 return 1; 01089 } 01090 01091 /* 01092 * Check if a simple key may start at the current position and add it if 01093 * needed. 01094 */ 01095 01096 static int 01097 yaml_parser_save_simple_key(yaml_parser_t *parser) 01098 { 01099 /* 01100 * A simple key is required at the current position if the scanner is in 01101 * the block context and the current column coincides with the indentation 01102 * level. 01103 */ 01104 01105 int required = (!parser->flow_level 01106 && parser->indent == (ptrdiff_t)parser->mark.column); 01107 01108 /* 01109 * A simple key is required only when it is the first token in the current 01110 * line. Therefore it is always allowed. But we add a check anyway. 01111 */ 01112 01113 assert(parser->simple_key_allowed || !required); /* Impossible. */ 01114 01115 /* 01116 * If the current position may start a simple key, save it. 01117 */ 01118 01119 if (parser->simple_key_allowed) 01120 { 01121 yaml_simple_key_t simple_key; 01122 simple_key.possible = 1; 01123 simple_key.required = required; 01124 simple_key.token_number = 01125 parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head); 01126 simple_key.mark = parser->mark; 01127 01128 if (!yaml_parser_remove_simple_key(parser)) return 0; 01129 01130 *(parser->simple_keys.top-1) = simple_key; 01131 } 01132 01133 return 1; 01134 } 01135 01136 /* 01137 * Remove a potential simple key at the current flow level. 01138 */ 01139 01140 static int 01141 yaml_parser_remove_simple_key(yaml_parser_t *parser) 01142 { 01143 yaml_simple_key_t *simple_key = parser->simple_keys.top-1; 01144 01145 if (simple_key->possible) 01146 { 01147 /* If the key is required, it is an error. */ 01148 01149 if (simple_key->required) { 01150 return yaml_parser_set_scanner_error(parser, 01151 "while scanning a simple key", simple_key->mark, 01152 "could not find expected ':'"); 01153 } 01154 } 01155 01156 /* Remove the key from the stack. */ 01157 01158 simple_key->possible = 0; 01159 01160 return 1; 01161 } 01162 01163 /* 01164 * Increase the flow level and resize the simple key list if needed. 01165 */ 01166 01167 static int 01168 yaml_parser_increase_flow_level(yaml_parser_t *parser) 01169 { 01170 yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } }; 01171 01172 /* Reset the simple key on the next level. */ 01173 01174 if (!PUSH(parser, parser->simple_keys, empty_simple_key)) 01175 return 0; 01176 01177 /* Increase the flow level. */ 01178 01179 if (parser->flow_level == INT_MAX) { 01180 parser->error = YAML_MEMORY_ERROR; 01181 return 0; 01182 } 01183 01184 parser->flow_level++; 01185 01186 return 1; 01187 } 01188 01189 /* 01190 * Decrease the flow level. 01191 */ 01192 01193 static int 01194 yaml_parser_decrease_flow_level(yaml_parser_t *parser) 01195 { 01196 if (parser->flow_level) { 01197 parser->flow_level --; 01198 (void)POP(parser, parser->simple_keys); 01199 } 01200 01201 return 1; 01202 } 01203 01204 /* 01205 * Push the current indentation level to the stack and set the new level 01206 * the current column is greater than the indentation level. In this case, 01207 * append or insert the specified token into the token queue. 01208 * 01209 */ 01210 01211 static int 01212 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column, 01213 ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark) 01214 { 01215 yaml_token_t token; 01216 01217 /* In the flow context, do nothing. */ 01218 01219 if (parser->flow_level) 01220 return 1; 01221 01222 if (parser->indent < column) 01223 { 01224 /* 01225 * Push the current indentation level to the stack and set the new 01226 * indentation level. 01227 */ 01228 01229 if (!PUSH(parser, parser->indents, parser->indent)) 01230 return 0; 01231 01232 #if PTRDIFF_MAX > INT_MAX 01233 if (column > INT_MAX) { 01234 parser->error = YAML_MEMORY_ERROR; 01235 return 0; 01236 } 01237 #endif 01238 01239 parser->indent = (int)column; 01240 01241 /* Create a token and insert it into the queue. */ 01242 01243 TOKEN_INIT(token, type, mark, mark); 01244 01245 if (number == -1) { 01246 if (!ENQUEUE(parser, parser->tokens, token)) 01247 return 0; 01248 } 01249 else { 01250 if (!QUEUE_INSERT(parser, 01251 parser->tokens, number - parser->tokens_parsed, token)) 01252 return 0; 01253 } 01254 } 01255 01256 return 1; 01257 } 01258 01259 /* 01260 * Pop indentation levels from the indents stack until the current level 01261 * becomes less or equal to the column. For each intendation level, append 01262 * the BLOCK-END token. 01263 */ 01264 01265 01266 static int 01267 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column) 01268 { 01269 yaml_token_t token; 01270 01271 /* In the flow context, do nothing. */ 01272 01273 if (parser->flow_level) 01274 return 1; 01275 01276 /* Loop through the intendation levels in the stack. */ 01277 01278 while (parser->indent > column) 01279 { 01280 /* Create a token and append it to the queue. */ 01281 01282 TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark); 01283 01284 if (!ENQUEUE(parser, parser->tokens, token)) 01285 return 0; 01286 01287 /* Pop the indentation level. */ 01288 01289 parser->indent = POP(parser, parser->indents); 01290 } 01291 01292 return 1; 01293 } 01294 01295 /* 01296 * Initialize the scanner and produce the STREAM-START token. 01297 */ 01298 01299 static int 01300 yaml_parser_fetch_stream_start(yaml_parser_t *parser) 01301 { 01302 yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } }; 01303 yaml_token_t token; 01304 01305 /* Set the initial indentation. */ 01306 01307 parser->indent = -1; 01308 01309 /* Initialize the simple key stack. */ 01310 01311 if (!PUSH(parser, parser->simple_keys, simple_key)) 01312 return 0; 01313 01314 /* A simple key is allowed at the beginning of the stream. */ 01315 01316 parser->simple_key_allowed = 1; 01317 01318 /* We have started. */ 01319 01320 parser->stream_start_produced = 1; 01321 01322 /* Create the STREAM-START token and append it to the queue. */ 01323 01324 STREAM_START_TOKEN_INIT(token, parser->encoding, 01325 parser->mark, parser->mark); 01326 01327 if (!ENQUEUE(parser, parser->tokens, token)) 01328 return 0; 01329 01330 return 1; 01331 } 01332 01333 /* 01334 * Produce the STREAM-END token and shut down the scanner. 01335 */ 01336 01337 static int 01338 yaml_parser_fetch_stream_end(yaml_parser_t *parser) 01339 { 01340 yaml_token_t token; 01341 01342 /* Force new line. */ 01343 01344 if (parser->mark.column != 0) { 01345 parser->mark.column = 0; 01346 parser->mark.line ++; 01347 } 01348 01349 /* Reset the indentation level. */ 01350 01351 if (!yaml_parser_unroll_indent(parser, -1)) 01352 return 0; 01353 01354 /* Reset simple keys. */ 01355 01356 if (!yaml_parser_remove_simple_key(parser)) 01357 return 0; 01358 01359 parser->simple_key_allowed = 0; 01360 01361 /* Create the STREAM-END token and append it to the queue. */ 01362 01363 STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark); 01364 01365 if (!ENQUEUE(parser, parser->tokens, token)) 01366 return 0; 01367 01368 return 1; 01369 } 01370 01371 /* 01372 * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. 01373 */ 01374 01375 static int 01376 yaml_parser_fetch_directive(yaml_parser_t *parser) 01377 { 01378 yaml_token_t token; 01379 01380 /* Reset the indentation level. */ 01381 01382 if (!yaml_parser_unroll_indent(parser, -1)) 01383 return 0; 01384 01385 /* Reset simple keys. */ 01386 01387 if (!yaml_parser_remove_simple_key(parser)) 01388 return 0; 01389 01390 parser->simple_key_allowed = 0; 01391 01392 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */ 01393 01394 if (!yaml_parser_scan_directive(parser, &token)) 01395 return 0; 01396 01397 /* Append the token to the queue. */ 01398 01399 if (!ENQUEUE(parser, parser->tokens, token)) { 01400 yaml_token_delete(&token); 01401 return 0; 01402 } 01403 01404 return 1; 01405 } 01406 01407 /* 01408 * Produce the DOCUMENT-START or DOCUMENT-END token. 01409 */ 01410 01411 static int 01412 yaml_parser_fetch_document_indicator(yaml_parser_t *parser, 01413 yaml_token_type_t type) 01414 { 01415 yaml_mark_t start_mark, end_mark; 01416 yaml_token_t token; 01417 01418 /* Reset the indentation level. */ 01419 01420 if (!yaml_parser_unroll_indent(parser, -1)) 01421 return 0; 01422 01423 /* Reset simple keys. */ 01424 01425 if (!yaml_parser_remove_simple_key(parser)) 01426 return 0; 01427 01428 parser->simple_key_allowed = 0; 01429 01430 /* Consume the token. */ 01431 01432 start_mark = parser->mark; 01433 01434 SKIP(parser); 01435 SKIP(parser); 01436 SKIP(parser); 01437 01438 end_mark = parser->mark; 01439 01440 /* Create the DOCUMENT-START or DOCUMENT-END token. */ 01441 01442 TOKEN_INIT(token, type, start_mark, end_mark); 01443 01444 /* Append the token to the queue. */ 01445 01446 if (!ENQUEUE(parser, parser->tokens, token)) 01447 return 0; 01448 01449 return 1; 01450 } 01451 01452 /* 01453 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 01454 */ 01455 01456 static int 01457 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, 01458 yaml_token_type_t type) 01459 { 01460 yaml_mark_t start_mark, end_mark; 01461 yaml_token_t token; 01462 01463 /* The indicators '[' and '{' may start a simple key. */ 01464 01465 if (!yaml_parser_save_simple_key(parser)) 01466 return 0; 01467 01468 /* Increase the flow level. */ 01469 01470 if (!yaml_parser_increase_flow_level(parser)) 01471 return 0; 01472 01473 /* A simple key may follow the indicators '[' and '{'. */ 01474 01475 parser->simple_key_allowed = 1; 01476 01477 /* Consume the token. */ 01478 01479 start_mark = parser->mark; 01480 SKIP(parser); 01481 end_mark = parser->mark; 01482 01483 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */ 01484 01485 TOKEN_INIT(token, type, start_mark, end_mark); 01486 01487 /* Append the token to the queue. */ 01488 01489 if (!ENQUEUE(parser, parser->tokens, token)) 01490 return 0; 01491 01492 return 1; 01493 } 01494 01495 /* 01496 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. 01497 */ 01498 01499 static int 01500 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, 01501 yaml_token_type_t type) 01502 { 01503 yaml_mark_t start_mark, end_mark; 01504 yaml_token_t token; 01505 01506 /* Reset any potential simple key on the current flow level. */ 01507 01508 if (!yaml_parser_remove_simple_key(parser)) 01509 return 0; 01510 01511 /* Decrease the flow level. */ 01512 01513 if (!yaml_parser_decrease_flow_level(parser)) 01514 return 0; 01515 01516 /* No simple keys after the indicators ']' and '}'. */ 01517 01518 parser->simple_key_allowed = 0; 01519 01520 /* Consume the token. */ 01521 01522 start_mark = parser->mark; 01523 SKIP(parser); 01524 end_mark = parser->mark; 01525 01526 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */ 01527 01528 TOKEN_INIT(token, type, start_mark, end_mark); 01529 01530 /* Append the token to the queue. */ 01531 01532 if (!ENQUEUE(parser, parser->tokens, token)) 01533 return 0; 01534 01535 return 1; 01536 } 01537 01538 /* 01539 * Produce the FLOW-ENTRY token. 01540 */ 01541 01542 static int 01543 yaml_parser_fetch_flow_entry(yaml_parser_t *parser) 01544 { 01545 yaml_mark_t start_mark, end_mark; 01546 yaml_token_t token; 01547 01548 /* Reset any potential simple keys on the current flow level. */ 01549 01550 if (!yaml_parser_remove_simple_key(parser)) 01551 return 0; 01552 01553 /* Simple keys are allowed after ','. */ 01554 01555 parser->simple_key_allowed = 1; 01556 01557 /* Consume the token. */ 01558 01559 start_mark = parser->mark; 01560 SKIP(parser); 01561 end_mark = parser->mark; 01562 01563 /* Create the FLOW-ENTRY token and append it to the queue. */ 01564 01565 TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); 01566 01567 if (!ENQUEUE(parser, parser->tokens, token)) 01568 return 0; 01569 01570 return 1; 01571 } 01572 01573 /* 01574 * Produce the BLOCK-ENTRY token. 01575 */ 01576 01577 static int 01578 yaml_parser_fetch_block_entry(yaml_parser_t *parser) 01579 { 01580 yaml_mark_t start_mark, end_mark; 01581 yaml_token_t token; 01582 01583 /* Check if the scanner is in the block context. */ 01584 01585 if (!parser->flow_level) 01586 { 01587 /* Check if we are allowed to start a new entry. */ 01588 01589 if (!parser->simple_key_allowed) { 01590 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01591 "block sequence entries are not allowed in this context"); 01592 } 01593 01594 /* Add the BLOCK-SEQUENCE-START token if needed. */ 01595 01596 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01597 YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark)) 01598 return 0; 01599 } 01600 else 01601 { 01602 /* 01603 * It is an error for the '-' indicator to occur in the flow context, 01604 * but we let the Parser detect and report about it because the Parser 01605 * is able to point to the context. 01606 */ 01607 } 01608 01609 /* Reset any potential simple keys on the current flow level. */ 01610 01611 if (!yaml_parser_remove_simple_key(parser)) 01612 return 0; 01613 01614 /* Simple keys are allowed after '-'. */ 01615 01616 parser->simple_key_allowed = 1; 01617 01618 /* Consume the token. */ 01619 01620 start_mark = parser->mark; 01621 SKIP(parser); 01622 end_mark = parser->mark; 01623 01624 /* Create the BLOCK-ENTRY token and append it to the queue. */ 01625 01626 TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); 01627 01628 if (!ENQUEUE(parser, parser->tokens, token)) 01629 return 0; 01630 01631 return 1; 01632 } 01633 01634 /* 01635 * Produce the KEY token. 01636 */ 01637 01638 static int 01639 yaml_parser_fetch_key(yaml_parser_t *parser) 01640 { 01641 yaml_mark_t start_mark, end_mark; 01642 yaml_token_t token; 01643 01644 /* In the block context, additional checks are required. */ 01645 01646 if (!parser->flow_level) 01647 { 01648 /* Check if we are allowed to start a new key (not nessesary simple). */ 01649 01650 if (!parser->simple_key_allowed) { 01651 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01652 "mapping keys are not allowed in this context"); 01653 } 01654 01655 /* Add the BLOCK-MAPPING-START token if needed. */ 01656 01657 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01658 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) 01659 return 0; 01660 } 01661 01662 /* Reset any potential simple keys on the current flow level. */ 01663 01664 if (!yaml_parser_remove_simple_key(parser)) 01665 return 0; 01666 01667 /* Simple keys are allowed after '?' in the block context. */ 01668 01669 parser->simple_key_allowed = (!parser->flow_level); 01670 01671 /* Consume the token. */ 01672 01673 start_mark = parser->mark; 01674 SKIP(parser); 01675 end_mark = parser->mark; 01676 01677 /* Create the KEY token and append it to the queue. */ 01678 01679 TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark); 01680 01681 if (!ENQUEUE(parser, parser->tokens, token)) 01682 return 0; 01683 01684 return 1; 01685 } 01686 01687 /* 01688 * Produce the VALUE token. 01689 */ 01690 01691 static int 01692 yaml_parser_fetch_value(yaml_parser_t *parser) 01693 { 01694 yaml_mark_t start_mark, end_mark; 01695 yaml_token_t token; 01696 yaml_simple_key_t *simple_key = parser->simple_keys.top-1; 01697 01698 /* Have we found a simple key? */ 01699 01700 if (simple_key->possible) 01701 { 01702 01703 /* Create the KEY token and insert it into the queue. */ 01704 01705 TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); 01706 01707 if (!QUEUE_INSERT(parser, parser->tokens, 01708 simple_key->token_number - parser->tokens_parsed, token)) 01709 return 0; 01710 01711 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ 01712 01713 if (!yaml_parser_roll_indent(parser, simple_key->mark.column, 01714 simple_key->token_number, 01715 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) 01716 return 0; 01717 01718 /* Remove the simple key. */ 01719 01720 simple_key->possible = 0; 01721 01722 /* A simple key cannot follow another simple key. */ 01723 01724 parser->simple_key_allowed = 0; 01725 } 01726 else 01727 { 01728 /* The ':' indicator follows a complex key. */ 01729 01730 /* In the block context, extra checks are required. */ 01731 01732 if (!parser->flow_level) 01733 { 01734 /* Check if we are allowed to start a complex value. */ 01735 01736 if (!parser->simple_key_allowed) { 01737 return yaml_parser_set_scanner_error(parser, NULL, parser->mark, 01738 "mapping values are not allowed in this context"); 01739 } 01740 01741 /* Add the BLOCK-MAPPING-START token if needed. */ 01742 01743 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, 01744 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) 01745 return 0; 01746 } 01747 01748 /* Simple keys after ':' are allowed in the block context. */ 01749 01750 parser->simple_key_allowed = (!parser->flow_level); 01751 } 01752 01753 /* Consume the token. */ 01754 01755 start_mark = parser->mark; 01756 SKIP(parser); 01757 end_mark = parser->mark; 01758 01759 /* Create the VALUE token and append it to the queue. */ 01760 01761 TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark); 01762 01763 if (!ENQUEUE(parser, parser->tokens, token)) 01764 return 0; 01765 01766 return 1; 01767 } 01768 01769 /* 01770 * Produce the ALIAS or ANCHOR token. 01771 */ 01772 01773 static int 01774 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) 01775 { 01776 yaml_token_t token; 01777 01778 /* An anchor or an alias could be a simple key. */ 01779 01780 if (!yaml_parser_save_simple_key(parser)) 01781 return 0; 01782 01783 /* A simple key cannot follow an anchor or an alias. */ 01784 01785 parser->simple_key_allowed = 0; 01786 01787 /* Create the ALIAS or ANCHOR token and append it to the queue. */ 01788 01789 if (!yaml_parser_scan_anchor(parser, &token, type)) 01790 return 0; 01791 01792 if (!ENQUEUE(parser, parser->tokens, token)) { 01793 yaml_token_delete(&token); 01794 return 0; 01795 } 01796 return 1; 01797 } 01798 01799 /* 01800 * Produce the TAG token. 01801 */ 01802 01803 static int 01804 yaml_parser_fetch_tag(yaml_parser_t *parser) 01805 { 01806 yaml_token_t token; 01807 01808 /* A tag could be a simple key. */ 01809 01810 if (!yaml_parser_save_simple_key(parser)) 01811 return 0; 01812 01813 /* A simple key cannot follow a tag. */ 01814 01815 parser->simple_key_allowed = 0; 01816 01817 /* Create the TAG token and append it to the queue. */ 01818 01819 if (!yaml_parser_scan_tag(parser, &token)) 01820 return 0; 01821 01822 if (!ENQUEUE(parser, parser->tokens, token)) { 01823 yaml_token_delete(&token); 01824 return 0; 01825 } 01826 01827 return 1; 01828 } 01829 01830 /* 01831 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. 01832 */ 01833 01834 static int 01835 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) 01836 { 01837 yaml_token_t token; 01838 01839 /* Remove any potential simple keys. */ 01840 01841 if (!yaml_parser_remove_simple_key(parser)) 01842 return 0; 01843 01844 /* A simple key may follow a block scalar. */ 01845 01846 parser->simple_key_allowed = 1; 01847 01848 /* Create the SCALAR token and append it to the queue. */ 01849 01850 if (!yaml_parser_scan_block_scalar(parser, &token, literal)) 01851 return 0; 01852 01853 if (!ENQUEUE(parser, parser->tokens, token)) { 01854 yaml_token_delete(&token); 01855 return 0; 01856 } 01857 01858 return 1; 01859 } 01860 01861 /* 01862 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. 01863 */ 01864 01865 static int 01866 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) 01867 { 01868 yaml_token_t token; 01869 01870 /* A plain scalar could be a simple key. */ 01871 01872 if (!yaml_parser_save_simple_key(parser)) 01873 return 0; 01874 01875 /* A simple key cannot follow a flow scalar. */ 01876 01877 parser->simple_key_allowed = 0; 01878 01879 /* Create the SCALAR token and append it to the queue. */ 01880 01881 if (!yaml_parser_scan_flow_scalar(parser, &token, single)) 01882 return 0; 01883 01884 if (!ENQUEUE(parser, parser->tokens, token)) { 01885 yaml_token_delete(&token); 01886 return 0; 01887 } 01888 01889 return 1; 01890 } 01891 01892 /* 01893 * Produce the SCALAR(...,plain) token. 01894 */ 01895 01896 static int 01897 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) 01898 { 01899 yaml_token_t token; 01900 01901 /* A plain scalar could be a simple key. */ 01902 01903 if (!yaml_parser_save_simple_key(parser)) 01904 return 0; 01905 01906 /* A simple key cannot follow a flow scalar. */ 01907 01908 parser->simple_key_allowed = 0; 01909 01910 /* Create the SCALAR token and append it to the queue. */ 01911 01912 if (!yaml_parser_scan_plain_scalar(parser, &token)) 01913 return 0; 01914 01915 if (!ENQUEUE(parser, parser->tokens, token)) { 01916 yaml_token_delete(&token); 01917 return 0; 01918 } 01919 01920 return 1; 01921 } 01922 01923 /* 01924 * Eat whitespaces and comments until the next token is found. 01925 */ 01926 01927 static int 01928 yaml_parser_scan_to_next_token(yaml_parser_t *parser) 01929 { 01930 /* Until the next token is not found. */ 01931 01932 while (1) 01933 { 01934 /* Allow the BOM mark to start a line. */ 01935 01936 if (!CACHE(parser, 1)) return 0; 01937 01938 if (parser->mark.column == 0 && IS_BOM(parser->buffer)) 01939 SKIP(parser); 01940 01941 /* 01942 * Eat whitespaces. 01943 * 01944 * Tabs are allowed: 01945 * 01946 * - in the flow context; 01947 * - in the block context, but not at the beginning of the line or 01948 * after '-', '?', or ':' (complex value). 01949 */ 01950 01951 if (!CACHE(parser, 1)) return 0; 01952 01953 while (CHECK(parser->buffer,' ') || 01954 ((parser->flow_level || !parser->simple_key_allowed) && 01955 CHECK(parser->buffer, '\t'))) { 01956 SKIP(parser); 01957 if (!CACHE(parser, 1)) return 0; 01958 } 01959 01960 /* Eat a comment until a line break. */ 01961 01962 if (CHECK(parser->buffer, '#')) { 01963 while (!IS_BREAKZ(parser->buffer)) { 01964 SKIP(parser); 01965 if (!CACHE(parser, 1)) return 0; 01966 } 01967 } 01968 01969 /* If it is a line break, eat it. */ 01970 01971 if (IS_BREAK(parser->buffer)) 01972 { 01973 if (!CACHE(parser, 2)) return 0; 01974 SKIP_LINE(parser); 01975 01976 /* In the block context, a new line may start a simple key. */ 01977 01978 if (!parser->flow_level) { 01979 parser->simple_key_allowed = 1; 01980 } 01981 } 01982 else 01983 { 01984 /* We have found a token. */ 01985 01986 break; 01987 } 01988 } 01989 01990 return 1; 01991 } 01992 01993 /* 01994 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. 01995 * 01996 * Scope: 01997 * %YAML 1.1 # a comment \n 01998 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 01999 * %TAG !yaml! tag:yaml.org,2002: \n 02000 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 02001 */ 02002 02003 int 02004 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) 02005 { 02006 yaml_mark_t start_mark, end_mark; 02007 yaml_char_t *name = NULL; 02008 int major, minor; 02009 yaml_char_t *handle = NULL, *prefix = NULL; 02010 02011 /* Eat '%'. */ 02012 02013 start_mark = parser->mark; 02014 02015 SKIP(parser); 02016 02017 /* Scan the directive name. */ 02018 02019 if (!yaml_parser_scan_directive_name(parser, start_mark, &name)) 02020 goto error; 02021 02022 /* Is it a YAML directive? */ 02023 02024 if (strcmp((char *)name, "YAML") == 0) 02025 { 02026 /* Scan the VERSION directive value. */ 02027 02028 if (!yaml_parser_scan_version_directive_value(parser, start_mark, 02029 &major, &minor)) 02030 goto error; 02031 02032 end_mark = parser->mark; 02033 02034 /* Create a VERSION-DIRECTIVE token. */ 02035 02036 VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor, 02037 start_mark, end_mark); 02038 } 02039 02040 /* Is it a TAG directive? */ 02041 02042 else if (strcmp((char *)name, "TAG") == 0) 02043 { 02044 /* Scan the TAG directive value. */ 02045 02046 if (!yaml_parser_scan_tag_directive_value(parser, start_mark, 02047 &handle, &prefix)) 02048 goto error; 02049 02050 end_mark = parser->mark; 02051 02052 /* Create a TAG-DIRECTIVE token. */ 02053 02054 TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix, 02055 start_mark, end_mark); 02056 } 02057 02058 /* Unknown directive. */ 02059 02060 else 02061 { 02062 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02063 start_mark, "found uknown directive name"); 02064 goto error; 02065 } 02066 02067 /* Eat the rest of the line including any comments. */ 02068 02069 if (!CACHE(parser, 1)) goto error; 02070 02071 while (IS_BLANK(parser->buffer)) { 02072 SKIP(parser); 02073 if (!CACHE(parser, 1)) goto error; 02074 } 02075 02076 if (CHECK(parser->buffer, '#')) { 02077 while (!IS_BREAKZ(parser->buffer)) { 02078 SKIP(parser); 02079 if (!CACHE(parser, 1)) goto error; 02080 } 02081 } 02082 02083 /* Check if we are at the end of the line. */ 02084 02085 if (!IS_BREAKZ(parser->buffer)) { 02086 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02087 start_mark, "did not find expected comment or line break"); 02088 goto error; 02089 } 02090 02091 /* Eat a line break. */ 02092 02093 if (IS_BREAK(parser->buffer)) { 02094 if (!CACHE(parser, 2)) goto error; 02095 SKIP_LINE(parser); 02096 } 02097 02098 yaml_free(name); 02099 02100 return 1; 02101 02102 error: 02103 yaml_free(prefix); 02104 yaml_free(handle); 02105 yaml_free(name); 02106 return 0; 02107 } 02108 02109 /* 02110 * Scan the directive name. 02111 * 02112 * Scope: 02113 * %YAML 1.1 # a comment \n 02114 * ^^^^ 02115 * %TAG !yaml! tag:yaml.org,2002: \n 02116 * ^^^ 02117 */ 02118 02119 static int 02120 yaml_parser_scan_directive_name(yaml_parser_t *parser, 02121 yaml_mark_t start_mark, yaml_char_t **name) 02122 { 02123 yaml_string_t string = NULL_STRING; 02124 02125 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02126 02127 /* Consume the directive name. */ 02128 02129 if (!CACHE(parser, 1)) goto error; 02130 02131 while (IS_ALPHA(parser->buffer)) 02132 { 02133 if (!READ(parser, string)) goto error; 02134 if (!CACHE(parser, 1)) goto error; 02135 } 02136 02137 /* Check if the name is empty. */ 02138 02139 if (string.start == string.pointer) { 02140 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02141 start_mark, "could not find expected directive name"); 02142 goto error; 02143 } 02144 02145 /* Check for an blank character after the name. */ 02146 02147 if (!IS_BLANKZ(parser->buffer)) { 02148 yaml_parser_set_scanner_error(parser, "while scanning a directive", 02149 start_mark, "found unexpected non-alphabetical character"); 02150 goto error; 02151 } 02152 02153 *name = string.start; 02154 02155 return 1; 02156 02157 error: 02158 STRING_DEL(parser, string); 02159 return 0; 02160 } 02161 02162 /* 02163 * Scan the value of VERSION-DIRECTIVE. 02164 * 02165 * Scope: 02166 * %YAML 1.1 # a comment \n 02167 * ^^^^^^ 02168 */ 02169 02170 static int 02171 yaml_parser_scan_version_directive_value(yaml_parser_t *parser, 02172 yaml_mark_t start_mark, int *major, int *minor) 02173 { 02174 /* Eat whitespaces. */ 02175 02176 if (!CACHE(parser, 1)) return 0; 02177 02178 while (IS_BLANK(parser->buffer)) { 02179 SKIP(parser); 02180 if (!CACHE(parser, 1)) return 0; 02181 } 02182 02183 /* Consume the major version number. */ 02184 02185 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major)) 02186 return 0; 02187 02188 /* Eat '.'. */ 02189 02190 if (!CHECK(parser->buffer, '.')) { 02191 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02192 start_mark, "did not find expected digit or '.' character"); 02193 } 02194 02195 SKIP(parser); 02196 02197 /* Consume the minor version number. */ 02198 02199 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) 02200 return 0; 02201 02202 return 1; 02203 } 02204 02205 #define MAX_NUMBER_LENGTH 9 02206 02207 /* 02208 * Scan the version number of VERSION-DIRECTIVE. 02209 * 02210 * Scope: 02211 * %YAML 1.1 # a comment \n 02212 * ^ 02213 * %YAML 1.1 # a comment \n 02214 * ^ 02215 */ 02216 02217 static int 02218 yaml_parser_scan_version_directive_number(yaml_parser_t *parser, 02219 yaml_mark_t start_mark, int *number) 02220 { 02221 int value = 0; 02222 size_t length = 0; 02223 02224 /* Repeat while the next character is digit. */ 02225 02226 if (!CACHE(parser, 1)) return 0; 02227 02228 while (IS_DIGIT(parser->buffer)) 02229 { 02230 /* Check if the number is too long. */ 02231 02232 if (++length > MAX_NUMBER_LENGTH) { 02233 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02234 start_mark, "found extremely long version number"); 02235 } 02236 02237 value = value*10 + AS_DIGIT(parser->buffer); 02238 02239 SKIP(parser); 02240 02241 if (!CACHE(parser, 1)) return 0; 02242 } 02243 02244 /* Check if the number was present. */ 02245 02246 if (!length) { 02247 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", 02248 start_mark, "did not find expected version number"); 02249 } 02250 02251 *number = value; 02252 02253 return 1; 02254 } 02255 02256 /* 02257 * Scan the value of a TAG-DIRECTIVE token. 02258 * 02259 * Scope: 02260 * %TAG !yaml! tag:yaml.org,2002: \n 02261 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 02262 */ 02263 02264 static int 02265 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, 02266 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix) 02267 { 02268 yaml_char_t *handle_value = NULL; 02269 yaml_char_t *prefix_value = NULL; 02270 02271 /* Eat whitespaces. */ 02272 02273 if (!CACHE(parser, 1)) goto error; 02274 02275 while (IS_BLANK(parser->buffer)) { 02276 SKIP(parser); 02277 if (!CACHE(parser, 1)) goto error; 02278 } 02279 02280 /* Scan a handle. */ 02281 02282 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value)) 02283 goto error; 02284 02285 /* Expect a whitespace. */ 02286 02287 if (!CACHE(parser, 1)) goto error; 02288 02289 if (!IS_BLANK(parser->buffer)) { 02290 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 02291 start_mark, "did not find expected whitespace"); 02292 goto error; 02293 } 02294 02295 /* Eat whitespaces. */ 02296 02297 while (IS_BLANK(parser->buffer)) { 02298 SKIP(parser); 02299 if (!CACHE(parser, 1)) goto error; 02300 } 02301 02302 /* Scan a prefix. */ 02303 02304 if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value)) 02305 goto error; 02306 02307 /* Expect a whitespace or line break. */ 02308 02309 if (!CACHE(parser, 1)) goto error; 02310 02311 if (!IS_BLANKZ(parser->buffer)) { 02312 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", 02313 start_mark, "did not find expected whitespace or line break"); 02314 goto error; 02315 } 02316 02317 *handle = handle_value; 02318 *prefix = prefix_value; 02319 02320 return 1; 02321 02322 error: 02323 yaml_free(handle_value); 02324 yaml_free(prefix_value); 02325 return 0; 02326 } 02327 02328 static int 02329 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, 02330 yaml_token_type_t type) 02331 { 02332 int length = 0; 02333 yaml_mark_t start_mark, end_mark; 02334 yaml_string_t string = NULL_STRING; 02335 02336 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02337 02338 /* Eat the indicator character. */ 02339 02340 start_mark = parser->mark; 02341 02342 SKIP(parser); 02343 02344 /* Consume the value. */ 02345 02346 if (!CACHE(parser, 1)) goto error; 02347 02348 while (IS_ALPHA(parser->buffer)) { 02349 if (!READ(parser, string)) goto error; 02350 if (!CACHE(parser, 1)) goto error; 02351 length ++; 02352 } 02353 02354 end_mark = parser->mark; 02355 02356 /* 02357 * Check if length of the anchor is greater than 0 and it is followed by 02358 * a whitespace character or one of the indicators: 02359 * 02360 * '?', ':', ',', ']', '}', '%', '@', '`'. 02361 */ 02362 02363 if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?') 02364 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',') 02365 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}') 02366 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@') 02367 || CHECK(parser->buffer, '`'))) { 02368 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? 02369 "while scanning an anchor" : "while scanning an alias", start_mark, 02370 "did not find expected alphabetic or numeric character"); 02371 goto error; 02372 } 02373 02374 /* Create a token. */ 02375 02376 if (type == YAML_ANCHOR_TOKEN) { 02377 ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark); 02378 } 02379 else { 02380 ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark); 02381 } 02382 02383 return 1; 02384 02385 error: 02386 STRING_DEL(parser, string); 02387 return 0; 02388 } 02389 02390 /* 02391 * Scan a TAG token. 02392 */ 02393 02394 static int 02395 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) 02396 { 02397 yaml_char_t *handle = NULL; 02398 yaml_char_t *suffix = NULL; 02399 yaml_mark_t start_mark, end_mark; 02400 02401 start_mark = parser->mark; 02402 02403 /* Check if the tag is in the canonical form. */ 02404 02405 if (!CACHE(parser, 2)) goto error; 02406 02407 if (CHECK_AT(parser->buffer, '<', 1)) 02408 { 02409 /* Set the handle to '' */ 02410 02411 handle = yaml_malloc(1); 02412 if (!handle) goto error; 02413 handle[0] = '\0'; 02414 02415 /* Eat '!<' */ 02416 02417 SKIP(parser); 02418 SKIP(parser); 02419 02420 /* Consume the tag value. */ 02421 02422 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 02423 goto error; 02424 02425 /* Check for '>' and eat it. */ 02426 02427 if (!CHECK(parser->buffer, '>')) { 02428 yaml_parser_set_scanner_error(parser, "while scanning a tag", 02429 start_mark, "did not find the expected '>'"); 02430 goto error; 02431 } 02432 02433 SKIP(parser); 02434 } 02435 else 02436 { 02437 /* The tag has either the '!suffix' or the '!handle!suffix' form. */ 02438 02439 /* First, try to scan a handle. */ 02440 02441 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle)) 02442 goto error; 02443 02444 /* Check if it is, indeed, handle. */ 02445 02446 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!') 02447 { 02448 /* Scan the suffix now. */ 02449 02450 if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) 02451 goto error; 02452 } 02453 else 02454 { 02455 /* It wasn't a handle after all. Scan the rest of the tag. */ 02456 02457 if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix)) 02458 goto error; 02459 02460 /* Set the handle to '!'. */ 02461 02462 yaml_free(handle); 02463 handle = yaml_malloc(2); 02464 if (!handle) goto error; 02465 handle[0] = '!'; 02466 handle[1] = '\0'; 02467 02468 /* 02469 * A special case: the '!' tag. Set the handle to '' and the 02470 * suffix to '!'. 02471 */ 02472 02473 if (suffix[0] == '\0') { 02474 yaml_char_t *tmp = handle; 02475 handle = suffix; 02476 suffix = tmp; 02477 } 02478 } 02479 } 02480 02481 /* Check the character which ends the tag. */ 02482 02483 if (!CACHE(parser, 1)) goto error; 02484 02485 if (!IS_BLANKZ(parser->buffer)) { 02486 yaml_parser_set_scanner_error(parser, "while scanning a tag", 02487 start_mark, "did not find expected whitespace or line break"); 02488 goto error; 02489 } 02490 02491 end_mark = parser->mark; 02492 02493 /* Create a token. */ 02494 02495 TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark); 02496 02497 return 1; 02498 02499 error: 02500 yaml_free(handle); 02501 yaml_free(suffix); 02502 return 0; 02503 } 02504 02505 /* 02506 * Scan a tag handle. 02507 */ 02508 02509 static int 02510 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, 02511 yaml_mark_t start_mark, yaml_char_t **handle) 02512 { 02513 yaml_string_t string = NULL_STRING; 02514 02515 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02516 02517 /* Check the initial '!' character. */ 02518 02519 if (!CACHE(parser, 1)) goto error; 02520 02521 if (!CHECK(parser->buffer, '!')) { 02522 yaml_parser_set_scanner_error(parser, directive ? 02523 "while scanning a tag directive" : "while scanning a tag", 02524 start_mark, "did not find expected '!'"); 02525 goto error; 02526 } 02527 02528 /* Copy the '!' character. */ 02529 02530 if (!READ(parser, string)) goto error; 02531 02532 /* Copy all subsequent alphabetical and numerical characters. */ 02533 02534 if (!CACHE(parser, 1)) goto error; 02535 02536 while (IS_ALPHA(parser->buffer)) 02537 { 02538 if (!READ(parser, string)) goto error; 02539 if (!CACHE(parser, 1)) goto error; 02540 } 02541 02542 /* Check if the trailing character is '!' and copy it. */ 02543 02544 if (CHECK(parser->buffer, '!')) 02545 { 02546 if (!READ(parser, string)) goto error; 02547 } 02548 else 02549 { 02550 /* 02551 * It's either the '!' tag or not really a tag handle. If it's a %TAG 02552 * directive, it's an error. If it's a tag token, it must be a part of 02553 * URI. 02554 */ 02555 02556 if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) { 02557 yaml_parser_set_scanner_error(parser, "while parsing a tag directive", 02558 start_mark, "did not find expected '!'"); 02559 goto error; 02560 } 02561 } 02562 02563 *handle = string.start; 02564 02565 return 1; 02566 02567 error: 02568 STRING_DEL(parser, string); 02569 return 0; 02570 } 02571 02572 /* 02573 * Scan a tag. 02574 */ 02575 02576 static int 02577 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, 02578 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) 02579 { 02580 size_t length = head ? strlen((char *)head) : 0; 02581 yaml_string_t string = NULL_STRING; 02582 02583 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02584 02585 /* Resize the string to include the head. */ 02586 02587 while ((size_t)(string.end - string.start) <= length) { 02588 if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) { 02589 parser->error = YAML_MEMORY_ERROR; 02590 goto error; 02591 } 02592 } 02593 02594 /* 02595 * Copy the head if needed. 02596 * 02597 * Note that we don't copy the leading '!' character. 02598 */ 02599 02600 if (length > 1) { 02601 memcpy(string.start, head+1, length-1); 02602 string.pointer += length-1; 02603 } 02604 02605 /* Scan the tag. */ 02606 02607 if (!CACHE(parser, 1)) goto error; 02608 02609 /* 02610 * The set of characters that may appear in URI is as follows: 02611 * 02612 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', 02613 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', 02614 * '%'. 02615 */ 02616 02617 while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';') 02618 || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?') 02619 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@') 02620 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=') 02621 || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$') 02622 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.') 02623 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~') 02624 || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'') 02625 || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')') 02626 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']') 02627 || CHECK(parser->buffer, '%')) 02628 { 02629 /* Check if it is a URI-escape sequence. */ 02630 02631 if (CHECK(parser->buffer, '%')) { 02632 if (!STRING_EXTEND(parser, string)) 02633 goto error; 02634 02635 if (!yaml_parser_scan_uri_escapes(parser, 02636 directive, start_mark, &string)) goto error; 02637 } 02638 else { 02639 if (!READ(parser, string)) goto error; 02640 } 02641 02642 length ++; 02643 if (!CACHE(parser, 1)) goto error; 02644 } 02645 02646 /* Check if the tag is non-empty. */ 02647 02648 if (!length) { 02649 if (!STRING_EXTEND(parser, string)) 02650 goto error; 02651 02652 yaml_parser_set_scanner_error(parser, directive ? 02653 "while parsing a %TAG directive" : "while parsing a tag", 02654 start_mark, "did not find expected tag URI"); 02655 goto error; 02656 } 02657 02658 *uri = string.start; 02659 02660 return 1; 02661 02662 error: 02663 STRING_DEL(parser, string); 02664 return 0; 02665 } 02666 02667 /* 02668 * Decode an URI-escape sequence corresponding to a single UTF-8 character. 02669 */ 02670 02671 static int 02672 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, 02673 yaml_mark_t start_mark, yaml_string_t *string) 02674 { 02675 int width = 0; 02676 02677 /* Decode the required number of characters. */ 02678 02679 do { 02680 02681 unsigned char octet = 0; 02682 02683 /* Check for a URI-escaped octet. */ 02684 02685 if (!CACHE(parser, 3)) return 0; 02686 02687 if (!(CHECK(parser->buffer, '%') 02688 && IS_HEX_AT(parser->buffer, 1) 02689 && IS_HEX_AT(parser->buffer, 2))) { 02690 return yaml_parser_set_scanner_error(parser, directive ? 02691 "while parsing a %TAG directive" : "while parsing a tag", 02692 start_mark, "did not find URI escaped octet"); 02693 } 02694 02695 /* Get the octet. */ 02696 02697 octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2); 02698 02699 /* If it is the leading octet, determine the length of the UTF-8 sequence. */ 02700 02701 if (!width) 02702 { 02703 width = (octet & 0x80) == 0x00 ? 1 : 02704 (octet & 0xE0) == 0xC0 ? 2 : 02705 (octet & 0xF0) == 0xE0 ? 3 : 02706 (octet & 0xF8) == 0xF0 ? 4 : 0; 02707 if (!width) { 02708 return yaml_parser_set_scanner_error(parser, directive ? 02709 "while parsing a %TAG directive" : "while parsing a tag", 02710 start_mark, "found an incorrect leading UTF-8 octet"); 02711 } 02712 } 02713 else 02714 { 02715 /* Check if the trailing octet is correct. */ 02716 02717 if ((octet & 0xC0) != 0x80) { 02718 return yaml_parser_set_scanner_error(parser, directive ? 02719 "while parsing a %TAG directive" : "while parsing a tag", 02720 start_mark, "found an incorrect trailing UTF-8 octet"); 02721 } 02722 } 02723 02724 /* Copy the octet and move the pointers. */ 02725 02726 *(string->pointer++) = octet; 02727 SKIP(parser); 02728 SKIP(parser); 02729 SKIP(parser); 02730 02731 } while (--width); 02732 02733 return 1; 02734 } 02735 02736 /* 02737 * Scan a block scalar. 02738 */ 02739 02740 static int 02741 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, 02742 int literal) 02743 { 02744 yaml_mark_t start_mark; 02745 yaml_mark_t end_mark; 02746 yaml_string_t string = NULL_STRING; 02747 yaml_string_t leading_break = NULL_STRING; 02748 yaml_string_t trailing_breaks = NULL_STRING; 02749 int chomping = 0; 02750 int increment = 0; 02751 int indent = 0; 02752 int leading_blank = 0; 02753 int trailing_blank = 0; 02754 02755 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 02756 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 02757 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 02758 02759 /* Eat the indicator '|' or '>'. */ 02760 02761 start_mark = parser->mark; 02762 02763 SKIP(parser); 02764 02765 /* Scan the additional block scalar indicators. */ 02766 02767 if (!CACHE(parser, 1)) goto error; 02768 02769 /* Check for a chomping indicator. */ 02770 02771 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) 02772 { 02773 /* Set the chomping method and eat the indicator. */ 02774 02775 chomping = CHECK(parser->buffer, '+') ? +1 : -1; 02776 02777 SKIP(parser); 02778 02779 /* Check for an indentation indicator. */ 02780 02781 if (!CACHE(parser, 1)) goto error; 02782 02783 if (IS_DIGIT(parser->buffer)) 02784 { 02785 /* Check that the intendation is greater than 0. */ 02786 02787 if (CHECK(parser->buffer, '0')) { 02788 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02789 start_mark, "found an intendation indicator equal to 0"); 02790 goto error; 02791 } 02792 02793 /* Get the intendation level and eat the indicator. */ 02794 02795 increment = AS_DIGIT(parser->buffer); 02796 02797 SKIP(parser); 02798 } 02799 } 02800 02801 /* Do the same as above, but in the opposite order. */ 02802 02803 else if (IS_DIGIT(parser->buffer)) 02804 { 02805 if (CHECK(parser->buffer, '0')) { 02806 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02807 start_mark, "found an intendation indicator equal to 0"); 02808 goto error; 02809 } 02810 02811 increment = AS_DIGIT(parser->buffer); 02812 02813 SKIP(parser); 02814 02815 if (!CACHE(parser, 1)) goto error; 02816 02817 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) { 02818 chomping = CHECK(parser->buffer, '+') ? +1 : -1; 02819 02820 SKIP(parser); 02821 } 02822 } 02823 02824 /* Eat whitespaces and comments to the end of the line. */ 02825 02826 if (!CACHE(parser, 1)) goto error; 02827 02828 while (IS_BLANK(parser->buffer)) { 02829 SKIP(parser); 02830 if (!CACHE(parser, 1)) goto error; 02831 } 02832 02833 if (CHECK(parser->buffer, '#')) { 02834 while (!IS_BREAKZ(parser->buffer)) { 02835 SKIP(parser); 02836 if (!CACHE(parser, 1)) goto error; 02837 } 02838 } 02839 02840 /* Check if we are at the end of the line. */ 02841 02842 if (!IS_BREAKZ(parser->buffer)) { 02843 yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02844 start_mark, "did not find expected comment or line break"); 02845 goto error; 02846 } 02847 02848 /* Eat a line break. */ 02849 02850 if (IS_BREAK(parser->buffer)) { 02851 if (!CACHE(parser, 2)) goto error; 02852 SKIP_LINE(parser); 02853 } 02854 02855 end_mark = parser->mark; 02856 02857 /* Set the intendation level if it was specified. */ 02858 02859 if (increment) { 02860 indent = parser->indent >= 0 ? parser->indent+increment : increment; 02861 } 02862 02863 /* Scan the leading line breaks and determine the indentation level if needed. */ 02864 02865 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, 02866 start_mark, &end_mark)) goto error; 02867 02868 /* Scan the block scalar content. */ 02869 02870 if (!CACHE(parser, 1)) goto error; 02871 02872 while ((int)parser->mark.column == indent && !IS_Z(parser->buffer)) 02873 { 02874 /* 02875 * We are at the beginning of a non-empty line. 02876 */ 02877 02878 /* Is it a trailing whitespace? */ 02879 02880 trailing_blank = IS_BLANK(parser->buffer); 02881 02882 /* Check if we need to fold the leading line break. */ 02883 02884 if (!literal && (*leading_break.start == '\n') 02885 && !leading_blank && !trailing_blank) 02886 { 02887 /* Do we need to join the lines by space? */ 02888 02889 if (*trailing_breaks.start == '\0') { 02890 if (!STRING_EXTEND(parser, string)) goto error; 02891 *(string.pointer ++) = ' '; 02892 } 02893 02894 CLEAR(parser, leading_break); 02895 } 02896 else { 02897 if (!JOIN(parser, string, leading_break)) goto error; 02898 CLEAR(parser, leading_break); 02899 } 02900 02901 /* Append the remaining line breaks. */ 02902 02903 if (!JOIN(parser, string, trailing_breaks)) goto error; 02904 CLEAR(parser, trailing_breaks); 02905 02906 /* Is it a leading whitespace? */ 02907 02908 leading_blank = IS_BLANK(parser->buffer); 02909 02910 /* Consume the current line. */ 02911 02912 while (!IS_BREAKZ(parser->buffer)) { 02913 if (!READ(parser, string)) goto error; 02914 if (!CACHE(parser, 1)) goto error; 02915 } 02916 02917 /* Consume the line break. */ 02918 02919 if (!CACHE(parser, 2)) goto error; 02920 02921 if (!READ_LINE(parser, leading_break)) goto error; 02922 02923 /* Eat the following intendation spaces and line breaks. */ 02924 02925 if (!yaml_parser_scan_block_scalar_breaks(parser, 02926 &indent, &trailing_breaks, start_mark, &end_mark)) goto error; 02927 } 02928 02929 /* Chomp the tail. */ 02930 02931 if (chomping != -1) { 02932 if (!JOIN(parser, string, leading_break)) goto error; 02933 } 02934 if (chomping == 1) { 02935 if (!JOIN(parser, string, trailing_breaks)) goto error; 02936 } 02937 02938 /* Create a token. */ 02939 02940 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 02941 literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, 02942 start_mark, end_mark); 02943 02944 STRING_DEL(parser, leading_break); 02945 STRING_DEL(parser, trailing_breaks); 02946 02947 return 1; 02948 02949 error: 02950 STRING_DEL(parser, string); 02951 STRING_DEL(parser, leading_break); 02952 STRING_DEL(parser, trailing_breaks); 02953 02954 return 0; 02955 } 02956 02957 /* 02958 * Scan intendation spaces and line breaks for a block scalar. Determine the 02959 * intendation level if needed. 02960 */ 02961 02962 static int 02963 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, 02964 int *indent, yaml_string_t *breaks, 02965 yaml_mark_t start_mark, yaml_mark_t *end_mark) 02966 { 02967 int max_indent = 0; 02968 02969 *end_mark = parser->mark; 02970 02971 /* Eat the intendation spaces and line breaks. */ 02972 02973 while (1) 02974 { 02975 /* Eat the intendation spaces. */ 02976 02977 if (!CACHE(parser, 1)) return 0; 02978 02979 while ((!*indent || (int)parser->mark.column < *indent) 02980 && IS_SPACE(parser->buffer)) { 02981 SKIP(parser); 02982 if (!CACHE(parser, 1)) return 0; 02983 } 02984 02985 if ((int)parser->mark.column > max_indent) 02986 max_indent = (int)parser->mark.column; 02987 02988 /* Check for a tab character messing the intendation. */ 02989 02990 if ((!*indent || (int)parser->mark.column < *indent) 02991 && IS_TAB(parser->buffer)) { 02992 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", 02993 start_mark, "found a tab character where an intendation space is expected"); 02994 } 02995 02996 /* Have we found a non-empty line? */ 02997 02998 if (!IS_BREAK(parser->buffer)) break; 02999 03000 /* Consume the line break. */ 03001 03002 if (!CACHE(parser, 2)) return 0; 03003 if (!READ_LINE(parser, *breaks)) return 0; 03004 *end_mark = parser->mark; 03005 } 03006 03007 /* Determine the indentation level if needed. */ 03008 03009 if (!*indent) { 03010 *indent = max_indent; 03011 if (*indent < parser->indent + 1) 03012 *indent = parser->indent + 1; 03013 if (*indent < 1) 03014 *indent = 1; 03015 } 03016 03017 return 1; 03018 } 03019 03020 /* 03021 * Scan a quoted scalar. 03022 */ 03023 03024 static int 03025 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, 03026 int single) 03027 { 03028 yaml_mark_t start_mark; 03029 yaml_mark_t end_mark; 03030 yaml_string_t string = NULL_STRING; 03031 yaml_string_t leading_break = NULL_STRING; 03032 yaml_string_t trailing_breaks = NULL_STRING; 03033 yaml_string_t whitespaces = NULL_STRING; 03034 int leading_blanks; 03035 03036 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 03037 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 03038 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 03039 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; 03040 03041 /* Eat the left quote. */ 03042 03043 start_mark = parser->mark; 03044 03045 SKIP(parser); 03046 03047 /* Consume the content of the quoted scalar. */ 03048 03049 while (1) 03050 { 03051 /* Check that there are no document indicators at the beginning of the line. */ 03052 03053 if (!CACHE(parser, 4)) goto error; 03054 03055 if (parser->mark.column == 0 && 03056 ((CHECK_AT(parser->buffer, '-', 0) && 03057 CHECK_AT(parser->buffer, '-', 1) && 03058 CHECK_AT(parser->buffer, '-', 2)) || 03059 (CHECK_AT(parser->buffer, '.', 0) && 03060 CHECK_AT(parser->buffer, '.', 1) && 03061 CHECK_AT(parser->buffer, '.', 2))) && 03062 IS_BLANKZ_AT(parser->buffer, 3)) 03063 { 03064 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 03065 start_mark, "found unexpected document indicator"); 03066 goto error; 03067 } 03068 03069 /* Check for EOF. */ 03070 03071 if (IS_Z(parser->buffer)) { 03072 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", 03073 start_mark, "found unexpected end of stream"); 03074 goto error; 03075 } 03076 03077 /* Consume non-blank characters. */ 03078 03079 if (!CACHE(parser, 2)) goto error; 03080 03081 leading_blanks = 0; 03082 03083 while (!IS_BLANKZ(parser->buffer)) 03084 { 03085 /* Check for an escaped single quote. */ 03086 03087 if (single && CHECK_AT(parser->buffer, '\'', 0) 03088 && CHECK_AT(parser->buffer, '\'', 1)) 03089 { 03090 if (!STRING_EXTEND(parser, string)) goto error; 03091 *(string.pointer++) = '\''; 03092 SKIP(parser); 03093 SKIP(parser); 03094 } 03095 03096 /* Check for the right quote. */ 03097 03098 else if (CHECK(parser->buffer, single ? '\'' : '"')) 03099 { 03100 break; 03101 } 03102 03103 /* Check for an escaped line break. */ 03104 03105 else if (!single && CHECK(parser->buffer, '\\') 03106 && IS_BREAK_AT(parser->buffer, 1)) 03107 { 03108 if (!CACHE(parser, 3)) goto error; 03109 SKIP(parser); 03110 SKIP_LINE(parser); 03111 leading_blanks = 1; 03112 break; 03113 } 03114 03115 /* Check for an escape sequence. */ 03116 03117 else if (!single && CHECK(parser->buffer, '\\')) 03118 { 03119 size_t code_length = 0; 03120 03121 if (!STRING_EXTEND(parser, string)) goto error; 03122 03123 /* Check the escape character. */ 03124 03125 switch (parser->buffer.pointer[1]) 03126 { 03127 case '0': 03128 *(string.pointer++) = '\0'; 03129 break; 03130 03131 case 'a': 03132 *(string.pointer++) = '\x07'; 03133 break; 03134 03135 case 'b': 03136 *(string.pointer++) = '\x08'; 03137 break; 03138 03139 case 't': 03140 case '\t': 03141 *(string.pointer++) = '\x09'; 03142 break; 03143 03144 case 'n': 03145 *(string.pointer++) = '\x0A'; 03146 break; 03147 03148 case 'v': 03149 *(string.pointer++) = '\x0B'; 03150 break; 03151 03152 case 'f': 03153 *(string.pointer++) = '\x0C'; 03154 break; 03155 03156 case 'r': 03157 *(string.pointer++) = '\x0D'; 03158 break; 03159 03160 case 'e': 03161 *(string.pointer++) = '\x1B'; 03162 break; 03163 03164 case ' ': 03165 *(string.pointer++) = '\x20'; 03166 break; 03167 03168 case '"': 03169 *(string.pointer++) = '"'; 03170 break; 03171 03172 case '\'': 03173 *(string.pointer++) = '\''; 03174 break; 03175 03176 case '\\': 03177 *(string.pointer++) = '\\'; 03178 break; 03179 03180 case 'N': /* NEL (#x85) */ 03181 *(string.pointer++) = '\xC2'; 03182 *(string.pointer++) = '\x85'; 03183 break; 03184 03185 case '_': /* #xA0 */ 03186 *(string.pointer++) = '\xC2'; 03187 *(string.pointer++) = '\xA0'; 03188 break; 03189 03190 case 'L': /* LS (#x2028) */ 03191 *(string.pointer++) = '\xE2'; 03192 *(string.pointer++) = '\x80'; 03193 *(string.pointer++) = '\xA8'; 03194 break; 03195 03196 case 'P': /* PS (#x2029) */ 03197 *(string.pointer++) = '\xE2'; 03198 *(string.pointer++) = '\x80'; 03199 *(string.pointer++) = '\xA9'; 03200 break; 03201 03202 case 'x': 03203 code_length = 2; 03204 break; 03205 03206 case 'u': 03207 code_length = 4; 03208 break; 03209 03210 case 'U': 03211 code_length = 8; 03212 break; 03213 03214 default: 03215 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03216 start_mark, "found unknown escape character"); 03217 goto error; 03218 } 03219 03220 SKIP(parser); 03221 SKIP(parser); 03222 03223 /* Consume an arbitrary escape code. */ 03224 03225 if (code_length) 03226 { 03227 unsigned int value = 0; 03228 size_t k; 03229 03230 /* Scan the character value. */ 03231 03232 if (!CACHE(parser, code_length)) goto error; 03233 03234 for (k = 0; k < code_length; k ++) { 03235 if (!IS_HEX_AT(parser->buffer, k)) { 03236 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03237 start_mark, "did not find expected hexdecimal number"); 03238 goto error; 03239 } 03240 value = (value << 4) + AS_HEX_AT(parser->buffer, k); 03241 } 03242 03243 /* Check the value and write the character. */ 03244 03245 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { 03246 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", 03247 start_mark, "found invalid Unicode character escape code"); 03248 goto error; 03249 } 03250 03251 if (value <= 0x7F) { 03252 *(string.pointer++) = value; 03253 } 03254 else if (value <= 0x7FF) { 03255 *(string.pointer++) = 0xC0 + (value >> 6); 03256 *(string.pointer++) = 0x80 + (value & 0x3F); 03257 } 03258 else if (value <= 0xFFFF) { 03259 *(string.pointer++) = 0xE0 + (value >> 12); 03260 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 03261 *(string.pointer++) = 0x80 + (value & 0x3F); 03262 } 03263 else { 03264 *(string.pointer++) = 0xF0 + (value >> 18); 03265 *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F); 03266 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); 03267 *(string.pointer++) = 0x80 + (value & 0x3F); 03268 } 03269 03270 /* Advance the pointer. */ 03271 03272 for (k = 0; k < code_length; k ++) { 03273 SKIP(parser); 03274 } 03275 } 03276 } 03277 03278 else 03279 { 03280 /* It is a non-escaped non-blank character. */ 03281 03282 if (!READ(parser, string)) goto error; 03283 } 03284 03285 if (!CACHE(parser, 2)) goto error; 03286 } 03287 03288 /* Check if we are at the end of the scalar. */ 03289 03290 if (CHECK(parser->buffer, single ? '\'' : '"')) 03291 break; 03292 03293 /* Consume blank characters. */ 03294 03295 if (!CACHE(parser, 1)) goto error; 03296 03297 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) 03298 { 03299 if (IS_BLANK(parser->buffer)) 03300 { 03301 /* Consume a space or a tab character. */ 03302 03303 if (!leading_blanks) { 03304 if (!READ(parser, whitespaces)) goto error; 03305 } 03306 else { 03307 SKIP(parser); 03308 } 03309 } 03310 else 03311 { 03312 if (!CACHE(parser, 2)) goto error; 03313 03314 /* Check if it is a first line break. */ 03315 03316 if (!leading_blanks) 03317 { 03318 CLEAR(parser, whitespaces); 03319 if (!READ_LINE(parser, leading_break)) goto error; 03320 leading_blanks = 1; 03321 } 03322 else 03323 { 03324 if (!READ_LINE(parser, trailing_breaks)) goto error; 03325 } 03326 } 03327 if (!CACHE(parser, 1)) goto error; 03328 } 03329 03330 /* Join the whitespaces or fold line breaks. */ 03331 03332 if (leading_blanks) 03333 { 03334 /* Do we need to fold line breaks? */ 03335 03336 if (leading_break.start[0] == '\n') { 03337 if (trailing_breaks.start[0] == '\0') { 03338 if (!STRING_EXTEND(parser, string)) goto error; 03339 *(string.pointer++) = ' '; 03340 } 03341 else { 03342 if (!JOIN(parser, string, trailing_breaks)) goto error; 03343 CLEAR(parser, trailing_breaks); 03344 } 03345 CLEAR(parser, leading_break); 03346 } 03347 else { 03348 if (!JOIN(parser, string, leading_break)) goto error; 03349 if (!JOIN(parser, string, trailing_breaks)) goto error; 03350 CLEAR(parser, leading_break); 03351 CLEAR(parser, trailing_breaks); 03352 } 03353 } 03354 else 03355 { 03356 if (!JOIN(parser, string, whitespaces)) goto error; 03357 CLEAR(parser, whitespaces); 03358 } 03359 } 03360 03361 /* Eat the right quote. */ 03362 03363 SKIP(parser); 03364 03365 end_mark = parser->mark; 03366 03367 /* Create a token. */ 03368 03369 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 03370 single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, 03371 start_mark, end_mark); 03372 03373 STRING_DEL(parser, leading_break); 03374 STRING_DEL(parser, trailing_breaks); 03375 STRING_DEL(parser, whitespaces); 03376 03377 return 1; 03378 03379 error: 03380 STRING_DEL(parser, string); 03381 STRING_DEL(parser, leading_break); 03382 STRING_DEL(parser, trailing_breaks); 03383 STRING_DEL(parser, whitespaces); 03384 03385 return 0; 03386 } 03387 03388 /* 03389 * Scan a plain scalar. 03390 */ 03391 03392 static int 03393 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) 03394 { 03395 yaml_mark_t start_mark; 03396 yaml_mark_t end_mark; 03397 yaml_string_t string = NULL_STRING; 03398 yaml_string_t leading_break = NULL_STRING; 03399 yaml_string_t trailing_breaks = NULL_STRING; 03400 yaml_string_t whitespaces = NULL_STRING; 03401 int leading_blanks = 0; 03402 int indent = parser->indent+1; 03403 03404 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; 03405 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; 03406 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; 03407 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; 03408 03409 start_mark = end_mark = parser->mark; 03410 03411 /* Consume the content of the plain scalar. */ 03412 03413 while (1) 03414 { 03415 /* Check for a document indicator. */ 03416 03417 if (!CACHE(parser, 4)) goto error; 03418 03419 if (parser->mark.column == 0 && 03420 ((CHECK_AT(parser->buffer, '-', 0) && 03421 CHECK_AT(parser->buffer, '-', 1) && 03422 CHECK_AT(parser->buffer, '-', 2)) || 03423 (CHECK_AT(parser->buffer, '.', 0) && 03424 CHECK_AT(parser->buffer, '.', 1) && 03425 CHECK_AT(parser->buffer, '.', 2))) && 03426 IS_BLANKZ_AT(parser->buffer, 3)) break; 03427 03428 /* Check for a comment. */ 03429 03430 if (CHECK(parser->buffer, '#')) 03431 break; 03432 03433 /* Consume non-blank characters. */ 03434 03435 while (!IS_BLANKZ(parser->buffer)) 03436 { 03437 /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */ 03438 03439 if (parser->flow_level 03440 && CHECK(parser->buffer, ':') 03441 && !IS_BLANKZ_AT(parser->buffer, 1)) { 03442 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 03443 start_mark, "found unexpected ':'"); 03444 goto error; 03445 } 03446 03447 /* Check for indicators that may end a plain scalar. */ 03448 03449 if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1)) 03450 || (parser->flow_level && 03451 (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':') 03452 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[') 03453 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') 03454 || CHECK(parser->buffer, '}')))) 03455 break; 03456 03457 /* Check if we need to join whitespaces and breaks. */ 03458 03459 if (leading_blanks || whitespaces.start != whitespaces.pointer) 03460 { 03461 if (leading_blanks) 03462 { 03463 /* Do we need to fold line breaks? */ 03464 03465 if (leading_break.start[0] == '\n') { 03466 if (trailing_breaks.start[0] == '\0') { 03467 if (!STRING_EXTEND(parser, string)) goto error; 03468 *(string.pointer++) = ' '; 03469 } 03470 else { 03471 if (!JOIN(parser, string, trailing_breaks)) goto error; 03472 CLEAR(parser, trailing_breaks); 03473 } 03474 CLEAR(parser, leading_break); 03475 } 03476 else { 03477 if (!JOIN(parser, string, leading_break)) goto error; 03478 if (!JOIN(parser, string, trailing_breaks)) goto error; 03479 CLEAR(parser, leading_break); 03480 CLEAR(parser, trailing_breaks); 03481 } 03482 03483 leading_blanks = 0; 03484 } 03485 else 03486 { 03487 if (!JOIN(parser, string, whitespaces)) goto error; 03488 CLEAR(parser, whitespaces); 03489 } 03490 } 03491 03492 /* Copy the character. */ 03493 03494 if (!READ(parser, string)) goto error; 03495 03496 end_mark = parser->mark; 03497 03498 if (!CACHE(parser, 2)) goto error; 03499 } 03500 03501 /* Is it the end? */ 03502 03503 if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))) 03504 break; 03505 03506 /* Consume blank characters. */ 03507 03508 if (!CACHE(parser, 1)) goto error; 03509 03510 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) 03511 { 03512 if (IS_BLANK(parser->buffer)) 03513 { 03514 /* Check for tab character that abuse intendation. */ 03515 03516 if (leading_blanks && (int)parser->mark.column < indent 03517 && IS_TAB(parser->buffer)) { 03518 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", 03519 start_mark, "found a tab character that violate intendation"); 03520 goto error; 03521 } 03522 03523 /* Consume a space or a tab character. */ 03524 03525 if (!leading_blanks) { 03526 if (!READ(parser, whitespaces)) goto error; 03527 } 03528 else { 03529 SKIP(parser); 03530 } 03531 } 03532 else 03533 { 03534 if (!CACHE(parser, 2)) goto error; 03535 03536 /* Check if it is a first line break. */ 03537 03538 if (!leading_blanks) 03539 { 03540 CLEAR(parser, whitespaces); 03541 if (!READ_LINE(parser, leading_break)) goto error; 03542 leading_blanks = 1; 03543 } 03544 else 03545 { 03546 if (!READ_LINE(parser, trailing_breaks)) goto error; 03547 } 03548 } 03549 if (!CACHE(parser, 1)) goto error; 03550 } 03551 03552 /* Check intendation level. */ 03553 03554 if (!parser->flow_level && (int)parser->mark.column < indent) 03555 break; 03556 } 03557 03558 /* Create a token. */ 03559 03560 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, 03561 YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); 03562 03563 /* Note that we change the 'simple_key_allowed' flag. */ 03564 03565 if (leading_blanks) { 03566 parser->simple_key_allowed = 1; 03567 } 03568 03569 STRING_DEL(parser, leading_break); 03570 STRING_DEL(parser, trailing_breaks); 03571 STRING_DEL(parser, whitespaces); 03572 03573 return 1; 03574 03575 error: 03576 STRING_DEL(parser, string); 03577 STRING_DEL(parser, leading_break); 03578 STRING_DEL(parser, trailing_breaks); 03579 STRING_DEL(parser, whitespaces); 03580 03581 return 0; 03582 } 03583 03584
1.7.6.1