Ruby  2.0.0p481(2014-05-08revision45883)
ext/psych/yaml/scanner.c
Go to the documentation of this file.
00001 
00002 /*
00003  * Introduction
00004  * ************
00005  *
00006  * The following notes assume that you are familiar with the YAML specification
00007  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
00008  * some cases we are less restrictive that it requires.
00009  *
00010  * The process of transforming a YAML stream into a sequence of events is
00011  * divided on two steps: Scanning and Parsing.
00012  *
00013  * The Scanner transforms the input stream into a sequence of tokens, while the
00014  * parser transform the sequence of tokens produced by the Scanner into a
00015  * sequence of parsing events.
00016  *
00017  * The Scanner is rather clever and complicated. The Parser, on the contrary,
00018  * is a straightforward implementation of a recursive-descendant parser (or,
00019  * LL(1) parser, as it is usually called).
00020  *
00021  * Actually there are two issues of Scanning that might be called "clever", the
00022  * rest is quite straightforward.  The issues are "block collection start" and
00023  * "simple keys".  Both issues are explained below in details.
00024  *
00025  * Here the Scanning step is explained and implemented.  We start with the list
00026  * of all the tokens produced by the Scanner together with short descriptions.
00027  *
00028  * Now, tokens:
00029  *
00030  *      STREAM-START(encoding)          # The stream start.
00031  *      STREAM-END                      # The stream end.
00032  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
00033  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
00034  *      DOCUMENT-START                  # '---'
00035  *      DOCUMENT-END                    # '...'
00036  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
00037  *      BLOCK-MAPPING-START             # sequence or a block mapping.
00038  *      BLOCK-END                       # Indentation decrease.
00039  *      FLOW-SEQUENCE-START             # '['
00040  *      FLOW-SEQUENCE-END               # ']'
00041  *      BLOCK-SEQUENCE-START            # '{'
00042  *      BLOCK-SEQUENCE-END              # '}'
00043  *      BLOCK-ENTRY                     # '-'
00044  *      FLOW-ENTRY                      # ','
00045  *      KEY                             # '?' or nothing (simple keys).
00046  *      VALUE                           # ':'
00047  *      ALIAS(anchor)                   # '*anchor'
00048  *      ANCHOR(anchor)                  # '&anchor'
00049  *      TAG(handle,suffix)              # '!handle!suffix'
00050  *      SCALAR(value,style)             # A scalar.
00051  *
00052  * The following two tokens are "virtual" tokens denoting the beginning and the
00053  * end of the stream:
00054  *
00055  *      STREAM-START(encoding)
00056  *      STREAM-END
00057  *
00058  * We pass the information about the input stream encoding with the
00059  * STREAM-START token.
00060  *
00061  * The next two tokens are responsible for tags:
00062  *
00063  *      VERSION-DIRECTIVE(major,minor)
00064  *      TAG-DIRECTIVE(handle,prefix)
00065  *
00066  * Example:
00067  *
00068  *      %YAML   1.1
00069  *      %TAG    !   !foo
00070  *      %TAG    !yaml!  tag:yaml.org,2002:
00071  *      ---
00072  *
00073  * The correspoding sequence of tokens:
00074  *
00075  *      STREAM-START(utf-8)
00076  *      VERSION-DIRECTIVE(1,1)
00077  *      TAG-DIRECTIVE("!","!foo")
00078  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
00079  *      DOCUMENT-START
00080  *      STREAM-END
00081  *
00082  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
00083  * line.
00084  *
00085  * The document start and end indicators are represented by:
00086  *
00087  *      DOCUMENT-START
00088  *      DOCUMENT-END
00089  *
00090  * Note that if a YAML stream contains an implicit document (without '---'
00091  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
00092  * produced.
00093  *
00094  * In the following examples, we present whole documents together with the
00095  * produced tokens.
00096  *
00097  *      1. An implicit document:
00098  *
00099  *          'a scalar'
00100  *
00101  *      Tokens:
00102  *
00103  *          STREAM-START(utf-8)
00104  *          SCALAR("a scalar",single-quoted)
00105  *          STREAM-END
00106  *
00107  *      2. An explicit document:
00108  *
00109  *          ---
00110  *          'a scalar'
00111  *          ...
00112  *
00113  *      Tokens:
00114  *
00115  *          STREAM-START(utf-8)
00116  *          DOCUMENT-START
00117  *          SCALAR("a scalar",single-quoted)
00118  *          DOCUMENT-END
00119  *          STREAM-END
00120  *
00121  *      3. Several documents in a stream:
00122  *
00123  *          'a scalar'
00124  *          ---
00125  *          'another scalar'
00126  *          ---
00127  *          'yet another scalar'
00128  *
00129  *      Tokens:
00130  *
00131  *          STREAM-START(utf-8)
00132  *          SCALAR("a scalar",single-quoted)
00133  *          DOCUMENT-START
00134  *          SCALAR("another scalar",single-quoted)
00135  *          DOCUMENT-START
00136  *          SCALAR("yet another scalar",single-quoted)
00137  *          STREAM-END
00138  *
00139  * We have already introduced the SCALAR token above.  The following tokens are
00140  * used to describe aliases, anchors, tag, and scalars:
00141  *
00142  *      ALIAS(anchor)
00143  *      ANCHOR(anchor)
00144  *      TAG(handle,suffix)
00145  *      SCALAR(value,style)
00146  *
00147  * The following series of examples illustrate the usage of these tokens:
00148  *
00149  *      1. A recursive sequence:
00150  *
00151  *          &A [ *A ]
00152  *
00153  *      Tokens:
00154  *
00155  *          STREAM-START(utf-8)
00156  *          ANCHOR("A")
00157  *          FLOW-SEQUENCE-START
00158  *          ALIAS("A")
00159  *          FLOW-SEQUENCE-END
00160  *          STREAM-END
00161  *
00162  *      2. A tagged scalar:
00163  *
00164  *          !!float "3.14"  # A good approximation.
00165  *
00166  *      Tokens:
00167  *
00168  *          STREAM-START(utf-8)
00169  *          TAG("!!","float")
00170  *          SCALAR("3.14",double-quoted)
00171  *          STREAM-END
00172  *
00173  *      3. Various scalar styles:
00174  *
00175  *          --- # Implicit empty plain scalars do not produce tokens.
00176  *          --- a plain scalar
00177  *          --- 'a single-quoted scalar'
00178  *          --- "a double-quoted scalar"
00179  *          --- |-
00180  *            a literal scalar
00181  *          --- >-
00182  *            a folded
00183  *            scalar
00184  *
00185  *      Tokens:
00186  *
00187  *          STREAM-START(utf-8)
00188  *          DOCUMENT-START
00189  *          DOCUMENT-START
00190  *          SCALAR("a plain scalar",plain)
00191  *          DOCUMENT-START
00192  *          SCALAR("a single-quoted scalar",single-quoted)
00193  *          DOCUMENT-START
00194  *          SCALAR("a double-quoted scalar",double-quoted)
00195  *          DOCUMENT-START
00196  *          SCALAR("a literal scalar",literal)
00197  *          DOCUMENT-START
00198  *          SCALAR("a folded scalar",folded)
00199  *          STREAM-END
00200  *
00201  * Now it's time to review collection-related tokens. We will start with
00202  * flow collections:
00203  *
00204  *      FLOW-SEQUENCE-START
00205  *      FLOW-SEQUENCE-END
00206  *      FLOW-MAPPING-START
00207  *      FLOW-MAPPING-END
00208  *      FLOW-ENTRY
00209  *      KEY
00210  *      VALUE
00211  *
00212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
00213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
00214  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
00215  * indicators '?' and ':', which are used for denoting mapping keys and values,
00216  * are represented by the KEY and VALUE tokens.
00217  *
00218  * The following examples show flow collections:
00219  *
00220  *      1. A flow sequence:
00221  *
00222  *          [item 1, item 2, item 3]
00223  *
00224  *      Tokens:
00225  *
00226  *          STREAM-START(utf-8)
00227  *          FLOW-SEQUENCE-START
00228  *          SCALAR("item 1",plain)
00229  *          FLOW-ENTRY
00230  *          SCALAR("item 2",plain)
00231  *          FLOW-ENTRY
00232  *          SCALAR("item 3",plain)
00233  *          FLOW-SEQUENCE-END
00234  *          STREAM-END
00235  *
00236  *      2. A flow mapping:
00237  *
00238  *          {
00239  *              a simple key: a value,  # Note that the KEY token is produced.
00240  *              ? a complex key: another value,
00241  *          }
00242  *
00243  *      Tokens:
00244  *
00245  *          STREAM-START(utf-8)
00246  *          FLOW-MAPPING-START
00247  *          KEY
00248  *          SCALAR("a simple key",plain)
00249  *          VALUE
00250  *          SCALAR("a value",plain)
00251  *          FLOW-ENTRY
00252  *          KEY
00253  *          SCALAR("a complex key",plain)
00254  *          VALUE
00255  *          SCALAR("another value",plain)
00256  *          FLOW-ENTRY
00257  *          FLOW-MAPPING-END
00258  *          STREAM-END
00259  *
00260  * A simple key is a key which is not denoted by the '?' indicator.  Note that
00261  * the Scanner still produce the KEY token whenever it encounters a simple key.
00262  *
00263  * For scanning block collections, the following tokens are used (note that we
00264  * repeat KEY and VALUE here):
00265  *
00266  *      BLOCK-SEQUENCE-START
00267  *      BLOCK-MAPPING-START
00268  *      BLOCK-END
00269  *      BLOCK-ENTRY
00270  *      KEY
00271  *      VALUE
00272  *
00273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
00274  * increase that precedes a block collection (cf. the INDENT token in Python).
00275  * The token BLOCK-END denote indentation decrease that ends a block collection
00276  * (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
00277  * that makes detections of these tokens more complex.
00278  *
00279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
00280  * '-', '?', and ':' correspondingly.
00281  *
00282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
00283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
00284  *
00285  *      1. Block sequences:
00286  *
00287  *          - item 1
00288  *          - item 2
00289  *          -
00290  *            - item 3.1
00291  *            - item 3.2
00292  *          -
00293  *            key 1: value 1
00294  *            key 2: value 2
00295  *
00296  *      Tokens:
00297  *
00298  *          STREAM-START(utf-8)
00299  *          BLOCK-SEQUENCE-START
00300  *          BLOCK-ENTRY
00301  *          SCALAR("item 1",plain)
00302  *          BLOCK-ENTRY
00303  *          SCALAR("item 2",plain)
00304  *          BLOCK-ENTRY
00305  *          BLOCK-SEQUENCE-START
00306  *          BLOCK-ENTRY
00307  *          SCALAR("item 3.1",plain)
00308  *          BLOCK-ENTRY
00309  *          SCALAR("item 3.2",plain)
00310  *          BLOCK-END
00311  *          BLOCK-ENTRY
00312  *          BLOCK-MAPPING-START
00313  *          KEY
00314  *          SCALAR("key 1",plain)
00315  *          VALUE
00316  *          SCALAR("value 1",plain)
00317  *          KEY
00318  *          SCALAR("key 2",plain)
00319  *          VALUE
00320  *          SCALAR("value 2",plain)
00321  *          BLOCK-END
00322  *          BLOCK-END
00323  *          STREAM-END
00324  *
00325  *      2. Block mappings:
00326  *
00327  *          a simple key: a value   # The KEY token is produced here.
00328  *          ? a complex key
00329  *          : another value
00330  *          a mapping:
00331  *            key 1: value 1
00332  *            key 2: value 2
00333  *          a sequence:
00334  *            - item 1
00335  *            - item 2
00336  *
00337  *      Tokens:
00338  *
00339  *          STREAM-START(utf-8)
00340  *          BLOCK-MAPPING-START
00341  *          KEY
00342  *          SCALAR("a simple key",plain)
00343  *          VALUE
00344  *          SCALAR("a value",plain)
00345  *          KEY
00346  *          SCALAR("a complex key",plain)
00347  *          VALUE
00348  *          SCALAR("another value",plain)
00349  *          KEY
00350  *          SCALAR("a mapping",plain)
00351  *          BLOCK-MAPPING-START
00352  *          KEY
00353  *          SCALAR("key 1",plain)
00354  *          VALUE
00355  *          SCALAR("value 1",plain)
00356  *          KEY
00357  *          SCALAR("key 2",plain)
00358  *          VALUE
00359  *          SCALAR("value 2",plain)
00360  *          BLOCK-END
00361  *          KEY
00362  *          SCALAR("a sequence",plain)
00363  *          VALUE
00364  *          BLOCK-SEQUENCE-START
00365  *          BLOCK-ENTRY
00366  *          SCALAR("item 1",plain)
00367  *          BLOCK-ENTRY
00368  *          SCALAR("item 2",plain)
00369  *          BLOCK-END
00370  *          BLOCK-END
00371  *          STREAM-END
00372  *
00373  * YAML does not always require to start a new block collection from a new
00374  * line.  If the current line contains only '-', '?', and ':' indicators, a new
00375  * block collection may start at the current line.  The following examples
00376  * illustrate this case:
00377  *
00378  *      1. Collections in a sequence:
00379  *
00380  *          - - item 1
00381  *            - item 2
00382  *          - key 1: value 1
00383  *            key 2: value 2
00384  *          - ? complex key
00385  *            : complex value
00386  *
00387  *      Tokens:
00388  *
00389  *          STREAM-START(utf-8)
00390  *          BLOCK-SEQUENCE-START
00391  *          BLOCK-ENTRY
00392  *          BLOCK-SEQUENCE-START
00393  *          BLOCK-ENTRY
00394  *          SCALAR("item 1",plain)
00395  *          BLOCK-ENTRY
00396  *          SCALAR("item 2",plain)
00397  *          BLOCK-END
00398  *          BLOCK-ENTRY
00399  *          BLOCK-MAPPING-START
00400  *          KEY
00401  *          SCALAR("key 1",plain)
00402  *          VALUE
00403  *          SCALAR("value 1",plain)
00404  *          KEY
00405  *          SCALAR("key 2",plain)
00406  *          VALUE
00407  *          SCALAR("value 2",plain)
00408  *          BLOCK-END
00409  *          BLOCK-ENTRY
00410  *          BLOCK-MAPPING-START
00411  *          KEY
00412  *          SCALAR("complex key")
00413  *          VALUE
00414  *          SCALAR("complex value")
00415  *          BLOCK-END
00416  *          BLOCK-END
00417  *          STREAM-END
00418  *
00419  *      2. Collections in a mapping:
00420  *
00421  *          ? a sequence
00422  *          : - item 1
00423  *            - item 2
00424  *          ? a mapping
00425  *          : key 1: value 1
00426  *            key 2: value 2
00427  *
00428  *      Tokens:
00429  *
00430  *          STREAM-START(utf-8)
00431  *          BLOCK-MAPPING-START
00432  *          KEY
00433  *          SCALAR("a sequence",plain)
00434  *          VALUE
00435  *          BLOCK-SEQUENCE-START
00436  *          BLOCK-ENTRY
00437  *          SCALAR("item 1",plain)
00438  *          BLOCK-ENTRY
00439  *          SCALAR("item 2",plain)
00440  *          BLOCK-END
00441  *          KEY
00442  *          SCALAR("a mapping",plain)
00443  *          VALUE
00444  *          BLOCK-MAPPING-START
00445  *          KEY
00446  *          SCALAR("key 1",plain)
00447  *          VALUE
00448  *          SCALAR("value 1",plain)
00449  *          KEY
00450  *          SCALAR("key 2",plain)
00451  *          VALUE
00452  *          SCALAR("value 2",plain)
00453  *          BLOCK-END
00454  *          BLOCK-END
00455  *          STREAM-END
00456  *
00457  * YAML also permits non-indented sequences if they are included into a block
00458  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
00459  *
00460  *      key:
00461  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
00462  *      - item 2
00463  *
00464  * Tokens:
00465  *
00466  *      STREAM-START(utf-8)
00467  *      BLOCK-MAPPING-START
00468  *      KEY
00469  *      SCALAR("key",plain)
00470  *      VALUE
00471  *      BLOCK-ENTRY
00472  *      SCALAR("item 1",plain)
00473  *      BLOCK-ENTRY
00474  *      SCALAR("item 2",plain)
00475  *      BLOCK-END
00476  */
00477 
00478 #include "yaml_private.h"
00479 
00480 /*
00481  * Ensure that the buffer contains the required number of characters.
00482  * Return 1 on success, 0 on failure (reader error or memory error).
00483  */
00484 
00485 #define CACHE(parser,length)                                                    \
00486     (parser->unread >= (length)                                                 \
00487         ? 1                                                                     \
00488         : yaml_parser_update_buffer(parser, (length)))
00489 
00490 /*
00491  * Advance the buffer pointer.
00492  */
00493 
00494 #define SKIP(parser)                                                            \
00495      (parser->mark.index ++,                                                    \
00496       parser->mark.column ++,                                                   \
00497       parser->unread --,                                                        \
00498       parser->buffer.pointer += WIDTH(parser->buffer))
00499 
00500 #define SKIP_LINE(parser)                                                       \
00501      (IS_CRLF(parser->buffer) ?                                                 \
00502       (parser->mark.index += 2,                                                 \
00503        parser->mark.column = 0,                                                 \
00504        parser->mark.line ++,                                                    \
00505        parser->unread -= 2,                                                     \
00506        parser->buffer.pointer += 2) :                                           \
00507       IS_BREAK(parser->buffer) ?                                                \
00508       (parser->mark.index ++,                                                   \
00509        parser->mark.column = 0,                                                 \
00510        parser->mark.line ++,                                                    \
00511        parser->unread --,                                                       \
00512        parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
00513 
00514 /*
00515  * Copy a character to a string buffer and advance pointers.
00516  */
00517 
00518 #define READ(parser,string)                                                     \
00519      (STRING_EXTEND(parser,string) ?                                            \
00520          (COPY(string,parser->buffer),                                          \
00521           parser->mark.index ++,                                                \
00522           parser->mark.column ++,                                               \
00523           parser->unread --,                                                    \
00524           1) : 0)
00525 
00526 /*
00527  * Copy a line break character to a string buffer and advance pointers.
00528  */
00529 
00530 #define READ_LINE(parser,string)                                                \
00531     (STRING_EXTEND(parser,string) ?                                             \
00532     (((CHECK_AT(parser->buffer,'\r',0)                                          \
00533        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
00534      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00535       parser->buffer.pointer += 2,                                              \
00536       parser->mark.index += 2,                                                  \
00537       parser->mark.column = 0,                                                  \
00538       parser->mark.line ++,                                                     \
00539       parser->unread -= 2) :                                                    \
00540      (CHECK_AT(parser->buffer,'\r',0)                                           \
00541       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
00542      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00543       parser->buffer.pointer ++,                                                \
00544       parser->mark.index ++,                                                    \
00545       parser->mark.column = 0,                                                  \
00546       parser->mark.line ++,                                                     \
00547       parser->unread --) :                                                      \
00548      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
00549       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
00550      (*((string).pointer++) = (yaml_char_t) '\n',                               \
00551       parser->buffer.pointer += 2,                                              \
00552       parser->mark.index ++,                                                    \
00553       parser->mark.column = 0,                                                  \
00554       parser->mark.line ++,                                                     \
00555       parser->unread --) :                                                      \
00556      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
00557       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
00558       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
00559        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
00560      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
00561       *((string).pointer++) = *(parser->buffer.pointer++),                      \
00562       *((string).pointer++) = *(parser->buffer.pointer++),                      \
00563       parser->mark.index ++,                                                    \
00564       parser->mark.column = 0,                                                  \
00565       parser->mark.line ++,                                                     \
00566       parser->unread --) : 0),                                                  \
00567     1) : 0)
00568 
00569 /*
00570  * Public API declarations.
00571  */
00572 
00573 YAML_DECLARE(int)
00574 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
00575 
00576 /*
00577  * Error handling.
00578  */
00579 
00580 static int
00581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
00582         yaml_mark_t context_mark, const char *problem);
00583 
00584 /*
00585  * High-level token API.
00586  */
00587 
00588 YAML_DECLARE(int)
00589 yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
00590 
00591 static int
00592 yaml_parser_fetch_next_token(yaml_parser_t *parser);
00593 
00594 /*
00595  * Potential simple keys.
00596  */
00597 
00598 static int
00599 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
00600 
00601 static int
00602 yaml_parser_save_simple_key(yaml_parser_t *parser);
00603 
00604 static int
00605 yaml_parser_remove_simple_key(yaml_parser_t *parser);
00606 
00607 static int
00608 yaml_parser_increase_flow_level(yaml_parser_t *parser);
00609 
00610 static int
00611 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
00612 
00613 /*
00614  * Indentation treatment.
00615  */
00616 
00617 static int
00618 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
00619         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
00620 
00621 static int
00622 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
00623 
00624 /*
00625  * Token fetchers.
00626  */
00627 
00628 static int
00629 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
00630 
00631 static int
00632 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
00633 
00634 static int
00635 yaml_parser_fetch_directive(yaml_parser_t *parser);
00636 
00637 static int
00638 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
00639         yaml_token_type_t type);
00640 
00641 static int
00642 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
00643         yaml_token_type_t type);
00644 
00645 static int
00646 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
00647         yaml_token_type_t type);
00648 
00649 static int
00650 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
00651 
00652 static int
00653 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
00654 
00655 static int
00656 yaml_parser_fetch_key(yaml_parser_t *parser);
00657 
00658 static int
00659 yaml_parser_fetch_value(yaml_parser_t *parser);
00660 
00661 static int
00662 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
00663 
00664 static int
00665 yaml_parser_fetch_tag(yaml_parser_t *parser);
00666 
00667 static int
00668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
00669 
00670 static int
00671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
00672 
00673 static int
00674 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
00675 
00676 /*
00677  * Token scanners.
00678  */
00679 
00680 static int
00681 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
00682 
00683 static int
00684 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
00685 
00686 static int
00687 yaml_parser_scan_directive_name(yaml_parser_t *parser,
00688         yaml_mark_t start_mark, yaml_char_t **name);
00689 
00690 static int
00691 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
00692         yaml_mark_t start_mark, int *major, int *minor);
00693 
00694 static int
00695 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
00696         yaml_mark_t start_mark, int *number);
00697 
00698 static int
00699 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
00700         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
00701 
00702 static int
00703 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
00704         yaml_token_type_t type);
00705 
00706 static int
00707 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
00708 
00709 static int
00710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
00711         yaml_mark_t start_mark, yaml_char_t **handle);
00712 
00713 static int
00714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
00715         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
00716 
00717 static int
00718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
00719         yaml_mark_t start_mark, yaml_string_t *string);
00720 
00721 static int
00722 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
00723         int literal);
00724 
00725 static int
00726 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
00727         int *indent, yaml_string_t *breaks,
00728         yaml_mark_t start_mark, yaml_mark_t *end_mark);
00729 
00730 static int
00731 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
00732         int single);
00733 
00734 static int
00735 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
00736 
00737 /*
00738  * Get the next token.
00739  */
00740 
00741 YAML_DECLARE(int)
00742 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
00743 {
00744     assert(parser); /* Non-NULL parser object is expected. */
00745     assert(token);  /* Non-NULL token object is expected. */
00746 
00747     /* Erase the token object. */
00748 
00749     memset(token, 0, sizeof(yaml_token_t));
00750 
00751     /* No tokens after STREAM-END or error. */
00752 
00753     if (parser->stream_end_produced || parser->error) {
00754         return 1;
00755     }
00756 
00757     /* Ensure that the tokens queue contains enough tokens. */
00758 
00759     if (!parser->token_available) {
00760         if (!yaml_parser_fetch_more_tokens(parser))
00761             return 0;
00762     }
00763 
00764     /* Fetch the next token from the queue. */
00765 
00766     *token = DEQUEUE(parser, parser->tokens);
00767     parser->token_available = 0;
00768     parser->tokens_parsed ++;
00769 
00770     if (token->type == YAML_STREAM_END_TOKEN) {
00771         parser->stream_end_produced = 1;
00772     }
00773 
00774     return 1;
00775 }
00776 
00777 /*
00778  * Set the scanner error and return 0.
00779  */
00780 
00781 static int
00782 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
00783         yaml_mark_t context_mark, const char *problem)
00784 {
00785     parser->error = YAML_SCANNER_ERROR;
00786     parser->context = context;
00787     parser->context_mark = context_mark;
00788     parser->problem = problem;
00789     parser->problem_mark = parser->mark;
00790 
00791     return 0;
00792 }
00793 
00794 /*
00795  * Ensure that the tokens queue contains at least one token which can be
00796  * returned to the Parser.
00797  */
00798 
00799 YAML_DECLARE(int)
00800 yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
00801 {
00802     int need_more_tokens;
00803 
00804     /* While we need more tokens to fetch, do it. */
00805 
00806     while (1)
00807     {
00808         /*
00809          * Check if we really need to fetch more tokens.
00810          */
00811 
00812         need_more_tokens = 0;
00813 
00814         if (parser->tokens.head == parser->tokens.tail)
00815         {
00816             /* Queue is empty. */
00817 
00818             need_more_tokens = 1;
00819         }
00820         else
00821         {
00822             yaml_simple_key_t *simple_key;
00823 
00824             /* Check if any potential simple key may occupy the head position. */
00825 
00826             if (!yaml_parser_stale_simple_keys(parser))
00827                 return 0;
00828 
00829             for (simple_key = parser->simple_keys.start;
00830                     simple_key != parser->simple_keys.top; simple_key++) {
00831                 if (simple_key->possible
00832                         && simple_key->token_number == parser->tokens_parsed) {
00833                     need_more_tokens = 1;
00834                     break;
00835                 }
00836             }
00837         }
00838 
00839         /* We are finished. */
00840 
00841         if (!need_more_tokens)
00842             break;
00843 
00844         /* Fetch the next token. */
00845 
00846         if (!yaml_parser_fetch_next_token(parser))
00847             return 0;
00848     }
00849 
00850     parser->token_available = 1;
00851 
00852     return 1;
00853 }
00854 
00855 /*
00856  * The dispatcher for token fetchers.
00857  */
00858 
00859 static int
00860 yaml_parser_fetch_next_token(yaml_parser_t *parser)
00861 {
00862     /* Ensure that the buffer is initialized. */
00863 
00864     if (!CACHE(parser, 1))
00865         return 0;
00866 
00867     /* Check if we just started scanning.  Fetch STREAM-START then. */
00868 
00869     if (!parser->stream_start_produced)
00870         return yaml_parser_fetch_stream_start(parser);
00871 
00872     /* Eat whitespaces and comments until we reach the next token. */
00873 
00874     if (!yaml_parser_scan_to_next_token(parser))
00875         return 0;
00876 
00877     /* Remove obsolete potential simple keys. */
00878 
00879     if (!yaml_parser_stale_simple_keys(parser))
00880         return 0;
00881 
00882     /* Check the indentation level against the current column. */
00883 
00884     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
00885         return 0;
00886 
00887     /*
00888      * Ensure that the buffer contains at least 4 characters.  4 is the length
00889      * of the longest indicators ('--- ' and '... ').
00890      */
00891 
00892     if (!CACHE(parser, 4))
00893         return 0;
00894 
00895     /* Is it the end of the stream? */
00896 
00897     if (IS_Z(parser->buffer))
00898         return yaml_parser_fetch_stream_end(parser);
00899 
00900     /* Is it a directive? */
00901 
00902     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
00903         return yaml_parser_fetch_directive(parser);
00904 
00905     /* Is it the document start indicator? */
00906 
00907     if (parser->mark.column == 0
00908             && CHECK_AT(parser->buffer, '-', 0)
00909             && CHECK_AT(parser->buffer, '-', 1)
00910             && CHECK_AT(parser->buffer, '-', 2)
00911             && IS_BLANKZ_AT(parser->buffer, 3))
00912         return yaml_parser_fetch_document_indicator(parser,
00913                 YAML_DOCUMENT_START_TOKEN);
00914 
00915     /* Is it the document end indicator? */
00916 
00917     if (parser->mark.column == 0
00918             && CHECK_AT(parser->buffer, '.', 0)
00919             && CHECK_AT(parser->buffer, '.', 1)
00920             && CHECK_AT(parser->buffer, '.', 2)
00921             && IS_BLANKZ_AT(parser->buffer, 3))
00922         return yaml_parser_fetch_document_indicator(parser,
00923                 YAML_DOCUMENT_END_TOKEN);
00924 
00925     /* Is it the flow sequence start indicator? */
00926 
00927     if (CHECK(parser->buffer, '['))
00928         return yaml_parser_fetch_flow_collection_start(parser,
00929                 YAML_FLOW_SEQUENCE_START_TOKEN);
00930 
00931     /* Is it the flow mapping start indicator? */
00932 
00933     if (CHECK(parser->buffer, '{'))
00934         return yaml_parser_fetch_flow_collection_start(parser,
00935                 YAML_FLOW_MAPPING_START_TOKEN);
00936 
00937     /* Is it the flow sequence end indicator? */
00938 
00939     if (CHECK(parser->buffer, ']'))
00940         return yaml_parser_fetch_flow_collection_end(parser,
00941                 YAML_FLOW_SEQUENCE_END_TOKEN);
00942 
00943     /* Is it the flow mapping end indicator? */
00944 
00945     if (CHECK(parser->buffer, '}'))
00946         return yaml_parser_fetch_flow_collection_end(parser,
00947                 YAML_FLOW_MAPPING_END_TOKEN);
00948 
00949     /* Is it the flow entry indicator? */
00950 
00951     if (CHECK(parser->buffer, ','))
00952         return yaml_parser_fetch_flow_entry(parser);
00953 
00954     /* Is it the block entry indicator? */
00955 
00956     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
00957         return yaml_parser_fetch_block_entry(parser);
00958 
00959     /* Is it the key indicator? */
00960 
00961     if (CHECK(parser->buffer, '?')
00962             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
00963         return yaml_parser_fetch_key(parser);
00964 
00965     /* Is it the value indicator? */
00966 
00967     if (CHECK(parser->buffer, ':')
00968             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
00969         return yaml_parser_fetch_value(parser);
00970 
00971     /* Is it an alias? */
00972 
00973     if (CHECK(parser->buffer, '*'))
00974         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
00975 
00976     /* Is it an anchor? */
00977 
00978     if (CHECK(parser->buffer, '&'))
00979         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
00980 
00981     /* Is it a tag? */
00982 
00983     if (CHECK(parser->buffer, '!'))
00984         return yaml_parser_fetch_tag(parser);
00985 
00986     /* Is it a literal scalar? */
00987 
00988     if (CHECK(parser->buffer, '|') && !parser->flow_level)
00989         return yaml_parser_fetch_block_scalar(parser, 1);
00990 
00991     /* Is it a folded scalar? */
00992 
00993     if (CHECK(parser->buffer, '>') && !parser->flow_level)
00994         return yaml_parser_fetch_block_scalar(parser, 0);
00995 
00996     /* Is it a single-quoted scalar? */
00997 
00998     if (CHECK(parser->buffer, '\''))
00999         return yaml_parser_fetch_flow_scalar(parser, 1);
01000 
01001     /* Is it a double-quoted scalar? */
01002 
01003     if (CHECK(parser->buffer, '"'))
01004         return yaml_parser_fetch_flow_scalar(parser, 0);
01005 
01006     /*
01007      * Is it a plain scalar?
01008      *
01009      * A plain scalar may start with any non-blank characters except
01010      *
01011      *      '-', '?', ':', ',', '[', ']', '{', '}',
01012      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
01013      *      '%', '@', '`'.
01014      *
01015      * In the block context (and, for the '-' indicator, in the flow context
01016      * too), it may also start with the characters
01017      *
01018      *      '-', '?', ':'
01019      *
01020      * if it is followed by a non-space character.
01021      *
01022      * The last rule is more restrictive than the specification requires.
01023      */
01024 
01025     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
01026                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
01027                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
01028                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
01029                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
01030                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
01031                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
01032                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
01033                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
01034                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
01035             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
01036             (!parser->flow_level &&
01037              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
01038              && !IS_BLANKZ_AT(parser->buffer, 1)))
01039         return yaml_parser_fetch_plain_scalar(parser);
01040 
01041     /*
01042      * If we don't determine the token type so far, it is an error.
01043      */
01044 
01045     return yaml_parser_set_scanner_error(parser,
01046             "while scanning for the next token", parser->mark,
01047             "found character that cannot start any token");
01048 }
01049 
01050 /*
01051  * Check the list of potential simple keys and remove the positions that
01052  * cannot contain simple keys anymore.
01053  */
01054 
01055 static int
01056 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
01057 {
01058     yaml_simple_key_t *simple_key;
01059 
01060     /* Check for a potential simple key for each flow level. */
01061 
01062     for (simple_key = parser->simple_keys.start;
01063             simple_key != parser->simple_keys.top; simple_key ++)
01064     {
01065         /*
01066          * The specification requires that a simple key
01067          *
01068          *  - is limited to a single line,
01069          *  - is shorter than 1024 characters.
01070          */
01071 
01072         if (simple_key->possible
01073                 && (simple_key->mark.line < parser->mark.line
01074                     || simple_key->mark.index+1024 < parser->mark.index)) {
01075 
01076             /* Check if the potential simple key to be removed is required. */
01077 
01078             if (simple_key->required) {
01079                 return yaml_parser_set_scanner_error(parser,
01080                         "while scanning a simple key", simple_key->mark,
01081                         "could not find expected ':'");
01082             }
01083 
01084             simple_key->possible = 0;
01085         }
01086     }
01087 
01088     return 1;
01089 }
01090 
01091 /*
01092  * Check if a simple key may start at the current position and add it if
01093  * needed.
01094  */
01095 
01096 static int
01097 yaml_parser_save_simple_key(yaml_parser_t *parser)
01098 {
01099     /*
01100      * A simple key is required at the current position if the scanner is in
01101      * the block context and the current column coincides with the indentation
01102      * level.
01103      */
01104 
01105     int required = (!parser->flow_level
01106             && parser->indent == (ptrdiff_t)parser->mark.column);
01107 
01108     /*
01109      * A simple key is required only when it is the first token in the current
01110      * line.  Therefore it is always allowed.  But we add a check anyway.
01111      */
01112 
01113     assert(parser->simple_key_allowed || !required);    /* Impossible. */
01114 
01115     /*
01116      * If the current position may start a simple key, save it.
01117      */
01118 
01119     if (parser->simple_key_allowed)
01120     {
01121         yaml_simple_key_t simple_key;
01122         simple_key.possible = 1;
01123         simple_key.required = required;
01124         simple_key.token_number =
01125             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
01126         simple_key.mark = parser->mark;
01127 
01128         if (!yaml_parser_remove_simple_key(parser)) return 0;
01129 
01130         *(parser->simple_keys.top-1) = simple_key;
01131     }
01132 
01133     return 1;
01134 }
01135 
01136 /*
01137  * Remove a potential simple key at the current flow level.
01138  */
01139 
01140 static int
01141 yaml_parser_remove_simple_key(yaml_parser_t *parser)
01142 {
01143     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
01144 
01145     if (simple_key->possible)
01146     {
01147         /* If the key is required, it is an error. */
01148 
01149         if (simple_key->required) {
01150             return yaml_parser_set_scanner_error(parser,
01151                     "while scanning a simple key", simple_key->mark,
01152                     "could not find expected ':'");
01153         }
01154     }
01155 
01156     /* Remove the key from the stack. */
01157 
01158     simple_key->possible = 0;
01159 
01160     return 1;
01161 }
01162 
01163 /*
01164  * Increase the flow level and resize the simple key list if needed.
01165  */
01166 
01167 static int
01168 yaml_parser_increase_flow_level(yaml_parser_t *parser)
01169 {
01170     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
01171 
01172     /* Reset the simple key on the next level. */
01173 
01174     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
01175         return 0;
01176 
01177     /* Increase the flow level. */
01178 
01179     if (parser->flow_level == INT_MAX) {
01180         parser->error = YAML_MEMORY_ERROR;
01181         return 0;
01182     }
01183 
01184     parser->flow_level++;
01185 
01186     return 1;
01187 }
01188 
01189 /*
01190  * Decrease the flow level.
01191  */
01192 
01193 static int
01194 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
01195 {
01196     if (parser->flow_level) {
01197         parser->flow_level --;
01198         (void)POP(parser, parser->simple_keys);
01199     }
01200 
01201     return 1;
01202 }
01203 
01204 /*
01205  * Push the current indentation level to the stack and set the new level
01206  * the current column is greater than the indentation level.  In this case,
01207  * append or insert the specified token into the token queue.
01208  *
01209  */
01210 
01211 static int
01212 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
01213         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
01214 {
01215     yaml_token_t token;
01216 
01217     /* In the flow context, do nothing. */
01218 
01219     if (parser->flow_level)
01220         return 1;
01221 
01222     if (parser->indent < column)
01223     {
01224         /*
01225          * Push the current indentation level to the stack and set the new
01226          * indentation level.
01227          */
01228 
01229         if (!PUSH(parser, parser->indents, parser->indent))
01230             return 0;
01231 
01232 #if PTRDIFF_MAX > INT_MAX
01233         if (column > INT_MAX) {
01234             parser->error = YAML_MEMORY_ERROR;
01235             return 0;
01236         }
01237 #endif
01238 
01239         parser->indent = (int)column;
01240 
01241         /* Create a token and insert it into the queue. */
01242 
01243         TOKEN_INIT(token, type, mark, mark);
01244 
01245         if (number == -1) {
01246             if (!ENQUEUE(parser, parser->tokens, token))
01247                 return 0;
01248         }
01249         else {
01250             if (!QUEUE_INSERT(parser,
01251                         parser->tokens, number - parser->tokens_parsed, token))
01252                 return 0;
01253         }
01254     }
01255 
01256     return 1;
01257 }
01258 
01259 /*
01260  * Pop indentation levels from the indents stack until the current level
01261  * becomes less or equal to the column.  For each intendation level, append
01262  * the BLOCK-END token.
01263  */
01264 
01265 
01266 static int
01267 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
01268 {
01269     yaml_token_t token;
01270 
01271     /* In the flow context, do nothing. */
01272 
01273     if (parser->flow_level)
01274         return 1;
01275 
01276     /* Loop through the intendation levels in the stack. */
01277 
01278     while (parser->indent > column)
01279     {
01280         /* Create a token and append it to the queue. */
01281 
01282         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
01283 
01284         if (!ENQUEUE(parser, parser->tokens, token))
01285             return 0;
01286 
01287         /* Pop the indentation level. */
01288 
01289         parser->indent = POP(parser, parser->indents);
01290     }
01291 
01292     return 1;
01293 }
01294 
01295 /*
01296  * Initialize the scanner and produce the STREAM-START token.
01297  */
01298 
01299 static int
01300 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
01301 {
01302     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
01303     yaml_token_t token;
01304 
01305     /* Set the initial indentation. */
01306 
01307     parser->indent = -1;
01308 
01309     /* Initialize the simple key stack. */
01310 
01311     if (!PUSH(parser, parser->simple_keys, simple_key))
01312         return 0;
01313 
01314     /* A simple key is allowed at the beginning of the stream. */
01315 
01316     parser->simple_key_allowed = 1;
01317 
01318     /* We have started. */
01319 
01320     parser->stream_start_produced = 1;
01321 
01322     /* Create the STREAM-START token and append it to the queue. */
01323 
01324     STREAM_START_TOKEN_INIT(token, parser->encoding,
01325             parser->mark, parser->mark);
01326 
01327     if (!ENQUEUE(parser, parser->tokens, token))
01328         return 0;
01329 
01330     return 1;
01331 }
01332 
01333 /*
01334  * Produce the STREAM-END token and shut down the scanner.
01335  */
01336 
01337 static int
01338 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
01339 {
01340     yaml_token_t token;
01341 
01342     /* Force new line. */
01343 
01344     if (parser->mark.column != 0) {
01345         parser->mark.column = 0;
01346         parser->mark.line ++;
01347     }
01348 
01349     /* Reset the indentation level. */
01350 
01351     if (!yaml_parser_unroll_indent(parser, -1))
01352         return 0;
01353 
01354     /* Reset simple keys. */
01355 
01356     if (!yaml_parser_remove_simple_key(parser))
01357         return 0;
01358 
01359     parser->simple_key_allowed = 0;
01360 
01361     /* Create the STREAM-END token and append it to the queue. */
01362 
01363     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
01364 
01365     if (!ENQUEUE(parser, parser->tokens, token))
01366         return 0;
01367 
01368     return 1;
01369 }
01370 
01371 /*
01372  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
01373  */
01374 
01375 static int
01376 yaml_parser_fetch_directive(yaml_parser_t *parser)
01377 {
01378     yaml_token_t token;
01379 
01380     /* Reset the indentation level. */
01381 
01382     if (!yaml_parser_unroll_indent(parser, -1))
01383         return 0;
01384 
01385     /* Reset simple keys. */
01386 
01387     if (!yaml_parser_remove_simple_key(parser))
01388         return 0;
01389 
01390     parser->simple_key_allowed = 0;
01391 
01392     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
01393 
01394     if (!yaml_parser_scan_directive(parser, &token))
01395         return 0;
01396 
01397     /* Append the token to the queue. */
01398 
01399     if (!ENQUEUE(parser, parser->tokens, token)) {
01400         yaml_token_delete(&token);
01401         return 0;
01402     }
01403 
01404     return 1;
01405 }
01406 
01407 /*
01408  * Produce the DOCUMENT-START or DOCUMENT-END token.
01409  */
01410 
01411 static int
01412 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
01413         yaml_token_type_t type)
01414 {
01415     yaml_mark_t start_mark, end_mark;
01416     yaml_token_t token;
01417 
01418     /* Reset the indentation level. */
01419 
01420     if (!yaml_parser_unroll_indent(parser, -1))
01421         return 0;
01422 
01423     /* Reset simple keys. */
01424 
01425     if (!yaml_parser_remove_simple_key(parser))
01426         return 0;
01427 
01428     parser->simple_key_allowed = 0;
01429 
01430     /* Consume the token. */
01431 
01432     start_mark = parser->mark;
01433 
01434     SKIP(parser);
01435     SKIP(parser);
01436     SKIP(parser);
01437 
01438     end_mark = parser->mark;
01439 
01440     /* Create the DOCUMENT-START or DOCUMENT-END token. */
01441 
01442     TOKEN_INIT(token, type, start_mark, end_mark);
01443 
01444     /* Append the token to the queue. */
01445 
01446     if (!ENQUEUE(parser, parser->tokens, token))
01447         return 0;
01448 
01449     return 1;
01450 }
01451 
01452 /*
01453  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
01454  */
01455 
01456 static int
01457 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
01458         yaml_token_type_t type)
01459 {
01460     yaml_mark_t start_mark, end_mark;
01461     yaml_token_t token;
01462 
01463     /* The indicators '[' and '{' may start a simple key. */
01464 
01465     if (!yaml_parser_save_simple_key(parser))
01466         return 0;
01467 
01468     /* Increase the flow level. */
01469 
01470     if (!yaml_parser_increase_flow_level(parser))
01471         return 0;
01472 
01473     /* A simple key may follow the indicators '[' and '{'. */
01474 
01475     parser->simple_key_allowed = 1;
01476 
01477     /* Consume the token. */
01478 
01479     start_mark = parser->mark;
01480     SKIP(parser);
01481     end_mark = parser->mark;
01482 
01483     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
01484 
01485     TOKEN_INIT(token, type, start_mark, end_mark);
01486 
01487     /* Append the token to the queue. */
01488 
01489     if (!ENQUEUE(parser, parser->tokens, token))
01490         return 0;
01491 
01492     return 1;
01493 }
01494 
01495 /*
01496  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
01497  */
01498 
01499 static int
01500 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
01501         yaml_token_type_t type)
01502 {
01503     yaml_mark_t start_mark, end_mark;
01504     yaml_token_t token;
01505 
01506     /* Reset any potential simple key on the current flow level. */
01507 
01508     if (!yaml_parser_remove_simple_key(parser))
01509         return 0;
01510 
01511     /* Decrease the flow level. */
01512 
01513     if (!yaml_parser_decrease_flow_level(parser))
01514         return 0;
01515 
01516     /* No simple keys after the indicators ']' and '}'. */
01517 
01518     parser->simple_key_allowed = 0;
01519 
01520     /* Consume the token. */
01521 
01522     start_mark = parser->mark;
01523     SKIP(parser);
01524     end_mark = parser->mark;
01525 
01526     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
01527 
01528     TOKEN_INIT(token, type, start_mark, end_mark);
01529 
01530     /* Append the token to the queue. */
01531 
01532     if (!ENQUEUE(parser, parser->tokens, token))
01533         return 0;
01534 
01535     return 1;
01536 }
01537 
01538 /*
01539  * Produce the FLOW-ENTRY token.
01540  */
01541 
01542 static int
01543 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
01544 {
01545     yaml_mark_t start_mark, end_mark;
01546     yaml_token_t token;
01547 
01548     /* Reset any potential simple keys on the current flow level. */
01549 
01550     if (!yaml_parser_remove_simple_key(parser))
01551         return 0;
01552 
01553     /* Simple keys are allowed after ','. */
01554 
01555     parser->simple_key_allowed = 1;
01556 
01557     /* Consume the token. */
01558 
01559     start_mark = parser->mark;
01560     SKIP(parser);
01561     end_mark = parser->mark;
01562 
01563     /* Create the FLOW-ENTRY token and append it to the queue. */
01564 
01565     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
01566 
01567     if (!ENQUEUE(parser, parser->tokens, token))
01568         return 0;
01569 
01570     return 1;
01571 }
01572 
01573 /*
01574  * Produce the BLOCK-ENTRY token.
01575  */
01576 
01577 static int
01578 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
01579 {
01580     yaml_mark_t start_mark, end_mark;
01581     yaml_token_t token;
01582 
01583     /* Check if the scanner is in the block context. */
01584 
01585     if (!parser->flow_level)
01586     {
01587         /* Check if we are allowed to start a new entry. */
01588 
01589         if (!parser->simple_key_allowed) {
01590             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01591                     "block sequence entries are not allowed in this context");
01592         }
01593 
01594         /* Add the BLOCK-SEQUENCE-START token if needed. */
01595 
01596         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01597                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
01598             return 0;
01599     }
01600     else
01601     {
01602         /*
01603          * It is an error for the '-' indicator to occur in the flow context,
01604          * but we let the Parser detect and report about it because the Parser
01605          * is able to point to the context.
01606          */
01607     }
01608 
01609     /* Reset any potential simple keys on the current flow level. */
01610 
01611     if (!yaml_parser_remove_simple_key(parser))
01612         return 0;
01613 
01614     /* Simple keys are allowed after '-'. */
01615 
01616     parser->simple_key_allowed = 1;
01617 
01618     /* Consume the token. */
01619 
01620     start_mark = parser->mark;
01621     SKIP(parser);
01622     end_mark = parser->mark;
01623 
01624     /* Create the BLOCK-ENTRY token and append it to the queue. */
01625 
01626     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
01627 
01628     if (!ENQUEUE(parser, parser->tokens, token))
01629         return 0;
01630 
01631     return 1;
01632 }
01633 
01634 /*
01635  * Produce the KEY token.
01636  */
01637 
01638 static int
01639 yaml_parser_fetch_key(yaml_parser_t *parser)
01640 {
01641     yaml_mark_t start_mark, end_mark;
01642     yaml_token_t token;
01643 
01644     /* In the block context, additional checks are required. */
01645 
01646     if (!parser->flow_level)
01647     {
01648         /* Check if we are allowed to start a new key (not nessesary simple). */
01649 
01650         if (!parser->simple_key_allowed) {
01651             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01652                     "mapping keys are not allowed in this context");
01653         }
01654 
01655         /* Add the BLOCK-MAPPING-START token if needed. */
01656 
01657         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01658                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
01659             return 0;
01660     }
01661 
01662     /* Reset any potential simple keys on the current flow level. */
01663 
01664     if (!yaml_parser_remove_simple_key(parser))
01665         return 0;
01666 
01667     /* Simple keys are allowed after '?' in the block context. */
01668 
01669     parser->simple_key_allowed = (!parser->flow_level);
01670 
01671     /* Consume the token. */
01672 
01673     start_mark = parser->mark;
01674     SKIP(parser);
01675     end_mark = parser->mark;
01676 
01677     /* Create the KEY token and append it to the queue. */
01678 
01679     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
01680 
01681     if (!ENQUEUE(parser, parser->tokens, token))
01682         return 0;
01683 
01684     return 1;
01685 }
01686 
01687 /*
01688  * Produce the VALUE token.
01689  */
01690 
01691 static int
01692 yaml_parser_fetch_value(yaml_parser_t *parser)
01693 {
01694     yaml_mark_t start_mark, end_mark;
01695     yaml_token_t token;
01696     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
01697 
01698     /* Have we found a simple key? */
01699 
01700     if (simple_key->possible)
01701     {
01702 
01703         /* Create the KEY token and insert it into the queue. */
01704 
01705         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
01706 
01707         if (!QUEUE_INSERT(parser, parser->tokens,
01708                     simple_key->token_number - parser->tokens_parsed, token))
01709             return 0;
01710 
01711         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
01712 
01713         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
01714                     simple_key->token_number,
01715                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
01716             return 0;
01717 
01718         /* Remove the simple key. */
01719 
01720         simple_key->possible = 0;
01721 
01722         /* A simple key cannot follow another simple key. */
01723 
01724         parser->simple_key_allowed = 0;
01725     }
01726     else
01727     {
01728         /* The ':' indicator follows a complex key. */
01729 
01730         /* In the block context, extra checks are required. */
01731 
01732         if (!parser->flow_level)
01733         {
01734             /* Check if we are allowed to start a complex value. */
01735 
01736             if (!parser->simple_key_allowed) {
01737                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
01738                         "mapping values are not allowed in this context");
01739             }
01740 
01741             /* Add the BLOCK-MAPPING-START token if needed. */
01742 
01743             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
01744                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
01745                 return 0;
01746         }
01747 
01748         /* Simple keys after ':' are allowed in the block context. */
01749 
01750         parser->simple_key_allowed = (!parser->flow_level);
01751     }
01752 
01753     /* Consume the token. */
01754 
01755     start_mark = parser->mark;
01756     SKIP(parser);
01757     end_mark = parser->mark;
01758 
01759     /* Create the VALUE token and append it to the queue. */
01760 
01761     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
01762 
01763     if (!ENQUEUE(parser, parser->tokens, token))
01764         return 0;
01765 
01766     return 1;
01767 }
01768 
01769 /*
01770  * Produce the ALIAS or ANCHOR token.
01771  */
01772 
01773 static int
01774 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
01775 {
01776     yaml_token_t token;
01777 
01778     /* An anchor or an alias could be a simple key. */
01779 
01780     if (!yaml_parser_save_simple_key(parser))
01781         return 0;
01782 
01783     /* A simple key cannot follow an anchor or an alias. */
01784 
01785     parser->simple_key_allowed = 0;
01786 
01787     /* Create the ALIAS or ANCHOR token and append it to the queue. */
01788 
01789     if (!yaml_parser_scan_anchor(parser, &token, type))
01790         return 0;
01791 
01792     if (!ENQUEUE(parser, parser->tokens, token)) {
01793         yaml_token_delete(&token);
01794         return 0;
01795     }
01796     return 1;
01797 }
01798 
01799 /*
01800  * Produce the TAG token.
01801  */
01802 
01803 static int
01804 yaml_parser_fetch_tag(yaml_parser_t *parser)
01805 {
01806     yaml_token_t token;
01807 
01808     /* A tag could be a simple key. */
01809 
01810     if (!yaml_parser_save_simple_key(parser))
01811         return 0;
01812 
01813     /* A simple key cannot follow a tag. */
01814 
01815     parser->simple_key_allowed = 0;
01816 
01817     /* Create the TAG token and append it to the queue. */
01818 
01819     if (!yaml_parser_scan_tag(parser, &token))
01820         return 0;
01821 
01822     if (!ENQUEUE(parser, parser->tokens, token)) {
01823         yaml_token_delete(&token);
01824         return 0;
01825     }
01826 
01827     return 1;
01828 }
01829 
01830 /*
01831  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
01832  */
01833 
01834 static int
01835 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
01836 {
01837     yaml_token_t token;
01838 
01839     /* Remove any potential simple keys. */
01840 
01841     if (!yaml_parser_remove_simple_key(parser))
01842         return 0;
01843 
01844     /* A simple key may follow a block scalar. */
01845 
01846     parser->simple_key_allowed = 1;
01847 
01848     /* Create the SCALAR token and append it to the queue. */
01849 
01850     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
01851         return 0;
01852 
01853     if (!ENQUEUE(parser, parser->tokens, token)) {
01854         yaml_token_delete(&token);
01855         return 0;
01856     }
01857 
01858     return 1;
01859 }
01860 
01861 /*
01862  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
01863  */
01864 
01865 static int
01866 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
01867 {
01868     yaml_token_t token;
01869 
01870     /* A plain scalar could be a simple key. */
01871 
01872     if (!yaml_parser_save_simple_key(parser))
01873         return 0;
01874 
01875     /* A simple key cannot follow a flow scalar. */
01876 
01877     parser->simple_key_allowed = 0;
01878 
01879     /* Create the SCALAR token and append it to the queue. */
01880 
01881     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
01882         return 0;
01883 
01884     if (!ENQUEUE(parser, parser->tokens, token)) {
01885         yaml_token_delete(&token);
01886         return 0;
01887     }
01888 
01889     return 1;
01890 }
01891 
01892 /*
01893  * Produce the SCALAR(...,plain) token.
01894  */
01895 
01896 static int
01897 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
01898 {
01899     yaml_token_t token;
01900 
01901     /* A plain scalar could be a simple key. */
01902 
01903     if (!yaml_parser_save_simple_key(parser))
01904         return 0;
01905 
01906     /* A simple key cannot follow a flow scalar. */
01907 
01908     parser->simple_key_allowed = 0;
01909 
01910     /* Create the SCALAR token and append it to the queue. */
01911 
01912     if (!yaml_parser_scan_plain_scalar(parser, &token))
01913         return 0;
01914 
01915     if (!ENQUEUE(parser, parser->tokens, token)) {
01916         yaml_token_delete(&token);
01917         return 0;
01918     }
01919 
01920     return 1;
01921 }
01922 
01923 /*
01924  * Eat whitespaces and comments until the next token is found.
01925  */
01926 
01927 static int
01928 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
01929 {
01930     /* Until the next token is not found. */
01931 
01932     while (1)
01933     {
01934         /* Allow the BOM mark to start a line. */
01935 
01936         if (!CACHE(parser, 1)) return 0;
01937 
01938         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
01939             SKIP(parser);
01940 
01941         /*
01942          * Eat whitespaces.
01943          *
01944          * Tabs are allowed:
01945          *
01946          *  - in the flow context;
01947          *  - in the block context, but not at the beginning of the line or
01948          *  after '-', '?', or ':' (complex value).
01949          */
01950 
01951         if (!CACHE(parser, 1)) return 0;
01952 
01953         while (CHECK(parser->buffer,' ') ||
01954                 ((parser->flow_level || !parser->simple_key_allowed) &&
01955                  CHECK(parser->buffer, '\t'))) {
01956             SKIP(parser);
01957             if (!CACHE(parser, 1)) return 0;
01958         }
01959 
01960         /* Eat a comment until a line break. */
01961 
01962         if (CHECK(parser->buffer, '#')) {
01963             while (!IS_BREAKZ(parser->buffer)) {
01964                 SKIP(parser);
01965                 if (!CACHE(parser, 1)) return 0;
01966             }
01967         }
01968 
01969         /* If it is a line break, eat it. */
01970 
01971         if (IS_BREAK(parser->buffer))
01972         {
01973             if (!CACHE(parser, 2)) return 0;
01974             SKIP_LINE(parser);
01975 
01976             /* In the block context, a new line may start a simple key. */
01977 
01978             if (!parser->flow_level) {
01979                 parser->simple_key_allowed = 1;
01980             }
01981         }
01982         else
01983         {
01984             /* We have found a token. */
01985 
01986             break;
01987         }
01988     }
01989 
01990     return 1;
01991 }
01992 
01993 /*
01994  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
01995  *
01996  * Scope:
01997  *      %YAML    1.1    # a comment \n
01998  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
01999  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
02000  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
02001  */
02002 
02003 int
02004 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
02005 {
02006     yaml_mark_t start_mark, end_mark;
02007     yaml_char_t *name = NULL;
02008     int major, minor;
02009     yaml_char_t *handle = NULL, *prefix = NULL;
02010 
02011     /* Eat '%'. */
02012 
02013     start_mark = parser->mark;
02014 
02015     SKIP(parser);
02016 
02017     /* Scan the directive name. */
02018 
02019     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
02020         goto error;
02021 
02022     /* Is it a YAML directive? */
02023 
02024     if (strcmp((char *)name, "YAML") == 0)
02025     {
02026         /* Scan the VERSION directive value. */
02027 
02028         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
02029                     &major, &minor))
02030             goto error;
02031 
02032         end_mark = parser->mark;
02033 
02034         /* Create a VERSION-DIRECTIVE token. */
02035 
02036         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
02037                 start_mark, end_mark);
02038     }
02039 
02040     /* Is it a TAG directive? */
02041 
02042     else if (strcmp((char *)name, "TAG") == 0)
02043     {
02044         /* Scan the TAG directive value. */
02045 
02046         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
02047                     &handle, &prefix))
02048             goto error;
02049 
02050         end_mark = parser->mark;
02051 
02052         /* Create a TAG-DIRECTIVE token. */
02053 
02054         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
02055                 start_mark, end_mark);
02056     }
02057 
02058     /* Unknown directive. */
02059 
02060     else
02061     {
02062         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02063                 start_mark, "found uknown directive name");
02064         goto error;
02065     }
02066 
02067     /* Eat the rest of the line including any comments. */
02068 
02069     if (!CACHE(parser, 1)) goto error;
02070 
02071     while (IS_BLANK(parser->buffer)) {
02072         SKIP(parser);
02073         if (!CACHE(parser, 1)) goto error;
02074     }
02075 
02076     if (CHECK(parser->buffer, '#')) {
02077         while (!IS_BREAKZ(parser->buffer)) {
02078             SKIP(parser);
02079             if (!CACHE(parser, 1)) goto error;
02080         }
02081     }
02082 
02083     /* Check if we are at the end of the line. */
02084 
02085     if (!IS_BREAKZ(parser->buffer)) {
02086         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02087                 start_mark, "did not find expected comment or line break");
02088         goto error;
02089     }
02090 
02091     /* Eat a line break. */
02092 
02093     if (IS_BREAK(parser->buffer)) {
02094         if (!CACHE(parser, 2)) goto error;
02095         SKIP_LINE(parser);
02096     }
02097 
02098     yaml_free(name);
02099 
02100     return 1;
02101 
02102 error:
02103     yaml_free(prefix);
02104     yaml_free(handle);
02105     yaml_free(name);
02106     return 0;
02107 }
02108 
02109 /*
02110  * Scan the directive name.
02111  *
02112  * Scope:
02113  *      %YAML   1.1     # a comment \n
02114  *       ^^^^
02115  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
02116  *       ^^^
02117  */
02118 
02119 static int
02120 yaml_parser_scan_directive_name(yaml_parser_t *parser,
02121         yaml_mark_t start_mark, yaml_char_t **name)
02122 {
02123     yaml_string_t string = NULL_STRING;
02124 
02125     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02126 
02127     /* Consume the directive name. */
02128 
02129     if (!CACHE(parser, 1)) goto error;
02130 
02131     while (IS_ALPHA(parser->buffer))
02132     {
02133         if (!READ(parser, string)) goto error;
02134         if (!CACHE(parser, 1)) goto error;
02135     }
02136 
02137     /* Check if the name is empty. */
02138 
02139     if (string.start == string.pointer) {
02140         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02141                 start_mark, "could not find expected directive name");
02142         goto error;
02143     }
02144 
02145     /* Check for an blank character after the name. */
02146 
02147     if (!IS_BLANKZ(parser->buffer)) {
02148         yaml_parser_set_scanner_error(parser, "while scanning a directive",
02149                 start_mark, "found unexpected non-alphabetical character");
02150         goto error;
02151     }
02152 
02153     *name = string.start;
02154 
02155     return 1;
02156 
02157 error:
02158     STRING_DEL(parser, string);
02159     return 0;
02160 }
02161 
02162 /*
02163  * Scan the value of VERSION-DIRECTIVE.
02164  *
02165  * Scope:
02166  *      %YAML   1.1     # a comment \n
02167  *           ^^^^^^
02168  */
02169 
02170 static int
02171 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
02172         yaml_mark_t start_mark, int *major, int *minor)
02173 {
02174     /* Eat whitespaces. */
02175 
02176     if (!CACHE(parser, 1)) return 0;
02177 
02178     while (IS_BLANK(parser->buffer)) {
02179         SKIP(parser);
02180         if (!CACHE(parser, 1)) return 0;
02181     }
02182 
02183     /* Consume the major version number. */
02184 
02185     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
02186         return 0;
02187 
02188     /* Eat '.'. */
02189 
02190     if (!CHECK(parser->buffer, '.')) {
02191         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02192                 start_mark, "did not find expected digit or '.' character");
02193     }
02194 
02195     SKIP(parser);
02196 
02197     /* Consume the minor version number. */
02198 
02199     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
02200         return 0;
02201 
02202     return 1;
02203 }
02204 
02205 #define MAX_NUMBER_LENGTH   9
02206 
02207 /*
02208  * Scan the version number of VERSION-DIRECTIVE.
02209  *
02210  * Scope:
02211  *      %YAML   1.1     # a comment \n
02212  *              ^
02213  *      %YAML   1.1     # a comment \n
02214  *                ^
02215  */
02216 
02217 static int
02218 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
02219         yaml_mark_t start_mark, int *number)
02220 {
02221     int value = 0;
02222     size_t length = 0;
02223 
02224     /* Repeat while the next character is digit. */
02225 
02226     if (!CACHE(parser, 1)) return 0;
02227 
02228     while (IS_DIGIT(parser->buffer))
02229     {
02230         /* Check if the number is too long. */
02231 
02232         if (++length > MAX_NUMBER_LENGTH) {
02233             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02234                     start_mark, "found extremely long version number");
02235         }
02236 
02237         value = value*10 + AS_DIGIT(parser->buffer);
02238 
02239         SKIP(parser);
02240 
02241         if (!CACHE(parser, 1)) return 0;
02242     }
02243 
02244     /* Check if the number was present. */
02245 
02246     if (!length) {
02247         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
02248                 start_mark, "did not find expected version number");
02249     }
02250 
02251     *number = value;
02252 
02253     return 1;
02254 }
02255 
02256 /*
02257  * Scan the value of a TAG-DIRECTIVE token.
02258  *
02259  * Scope:
02260  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
02261  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
02262  */
02263 
02264 static int
02265 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
02266         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
02267 {
02268     yaml_char_t *handle_value = NULL;
02269     yaml_char_t *prefix_value = NULL;
02270 
02271     /* Eat whitespaces. */
02272 
02273     if (!CACHE(parser, 1)) goto error;
02274 
02275     while (IS_BLANK(parser->buffer)) {
02276         SKIP(parser);
02277         if (!CACHE(parser, 1)) goto error;
02278     }
02279 
02280     /* Scan a handle. */
02281 
02282     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
02283         goto error;
02284 
02285     /* Expect a whitespace. */
02286 
02287     if (!CACHE(parser, 1)) goto error;
02288 
02289     if (!IS_BLANK(parser->buffer)) {
02290         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
02291                 start_mark, "did not find expected whitespace");
02292         goto error;
02293     }
02294 
02295     /* Eat whitespaces. */
02296 
02297     while (IS_BLANK(parser->buffer)) {
02298         SKIP(parser);
02299         if (!CACHE(parser, 1)) goto error;
02300     }
02301 
02302     /* Scan a prefix. */
02303 
02304     if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
02305         goto error;
02306 
02307     /* Expect a whitespace or line break. */
02308 
02309     if (!CACHE(parser, 1)) goto error;
02310 
02311     if (!IS_BLANKZ(parser->buffer)) {
02312         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
02313                 start_mark, "did not find expected whitespace or line break");
02314         goto error;
02315     }
02316 
02317     *handle = handle_value;
02318     *prefix = prefix_value;
02319 
02320     return 1;
02321 
02322 error:
02323     yaml_free(handle_value);
02324     yaml_free(prefix_value);
02325     return 0;
02326 }
02327 
02328 static int
02329 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
02330         yaml_token_type_t type)
02331 {
02332     int length = 0;
02333     yaml_mark_t start_mark, end_mark;
02334     yaml_string_t string = NULL_STRING;
02335 
02336     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02337 
02338     /* Eat the indicator character. */
02339 
02340     start_mark = parser->mark;
02341 
02342     SKIP(parser);
02343 
02344     /* Consume the value. */
02345 
02346     if (!CACHE(parser, 1)) goto error;
02347 
02348     while (IS_ALPHA(parser->buffer)) {
02349         if (!READ(parser, string)) goto error;
02350         if (!CACHE(parser, 1)) goto error;
02351         length ++;
02352     }
02353 
02354     end_mark = parser->mark;
02355 
02356     /*
02357      * Check if length of the anchor is greater than 0 and it is followed by
02358      * a whitespace character or one of the indicators:
02359      *
02360      *      '?', ':', ',', ']', '}', '%', '@', '`'.
02361      */
02362 
02363     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
02364                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
02365                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
02366                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
02367                 || CHECK(parser->buffer, '`'))) {
02368         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
02369                 "while scanning an anchor" : "while scanning an alias", start_mark,
02370                 "did not find expected alphabetic or numeric character");
02371         goto error;
02372     }
02373 
02374     /* Create a token. */
02375 
02376     if (type == YAML_ANCHOR_TOKEN) {
02377         ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
02378     }
02379     else {
02380         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
02381     }
02382 
02383     return 1;
02384 
02385 error:
02386     STRING_DEL(parser, string);
02387     return 0;
02388 }
02389 
02390 /*
02391  * Scan a TAG token.
02392  */
02393 
02394 static int
02395 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
02396 {
02397     yaml_char_t *handle = NULL;
02398     yaml_char_t *suffix = NULL;
02399     yaml_mark_t start_mark, end_mark;
02400 
02401     start_mark = parser->mark;
02402 
02403     /* Check if the tag is in the canonical form. */
02404 
02405     if (!CACHE(parser, 2)) goto error;
02406 
02407     if (CHECK_AT(parser->buffer, '<', 1))
02408     {
02409         /* Set the handle to '' */
02410 
02411         handle = yaml_malloc(1);
02412         if (!handle) goto error;
02413         handle[0] = '\0';
02414 
02415         /* Eat '!<' */
02416 
02417         SKIP(parser);
02418         SKIP(parser);
02419 
02420         /* Consume the tag value. */
02421 
02422         if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
02423             goto error;
02424 
02425         /* Check for '>' and eat it. */
02426 
02427         if (!CHECK(parser->buffer, '>')) {
02428             yaml_parser_set_scanner_error(parser, "while scanning a tag",
02429                     start_mark, "did not find the expected '>'");
02430             goto error;
02431         }
02432 
02433         SKIP(parser);
02434     }
02435     else
02436     {
02437         /* The tag has either the '!suffix' or the '!handle!suffix' form. */
02438 
02439         /* First, try to scan a handle. */
02440 
02441         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
02442             goto error;
02443 
02444         /* Check if it is, indeed, handle. */
02445 
02446         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
02447         {
02448             /* Scan the suffix now. */
02449 
02450             if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
02451                 goto error;
02452         }
02453         else
02454         {
02455             /* It wasn't a handle after all.  Scan the rest of the tag. */
02456 
02457             if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
02458                 goto error;
02459 
02460             /* Set the handle to '!'. */
02461 
02462             yaml_free(handle);
02463             handle = yaml_malloc(2);
02464             if (!handle) goto error;
02465             handle[0] = '!';
02466             handle[1] = '\0';
02467 
02468             /*
02469              * A special case: the '!' tag.  Set the handle to '' and the
02470              * suffix to '!'.
02471              */
02472 
02473             if (suffix[0] == '\0') {
02474                 yaml_char_t *tmp = handle;
02475                 handle = suffix;
02476                 suffix = tmp;
02477             }
02478         }
02479     }
02480 
02481     /* Check the character which ends the tag. */
02482 
02483     if (!CACHE(parser, 1)) goto error;
02484 
02485     if (!IS_BLANKZ(parser->buffer)) {
02486         yaml_parser_set_scanner_error(parser, "while scanning a tag",
02487                 start_mark, "did not find expected whitespace or line break");
02488         goto error;
02489     }
02490 
02491     end_mark = parser->mark;
02492 
02493     /* Create a token. */
02494 
02495     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
02496 
02497     return 1;
02498 
02499 error:
02500     yaml_free(handle);
02501     yaml_free(suffix);
02502     return 0;
02503 }
02504 
02505 /*
02506  * Scan a tag handle.
02507  */
02508 
02509 static int
02510 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
02511         yaml_mark_t start_mark, yaml_char_t **handle)
02512 {
02513     yaml_string_t string = NULL_STRING;
02514 
02515     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02516 
02517     /* Check the initial '!' character. */
02518 
02519     if (!CACHE(parser, 1)) goto error;
02520 
02521     if (!CHECK(parser->buffer, '!')) {
02522         yaml_parser_set_scanner_error(parser, directive ?
02523                 "while scanning a tag directive" : "while scanning a tag",
02524                 start_mark, "did not find expected '!'");
02525         goto error;
02526     }
02527 
02528     /* Copy the '!' character. */
02529 
02530     if (!READ(parser, string)) goto error;
02531 
02532     /* Copy all subsequent alphabetical and numerical characters. */
02533 
02534     if (!CACHE(parser, 1)) goto error;
02535 
02536     while (IS_ALPHA(parser->buffer))
02537     {
02538         if (!READ(parser, string)) goto error;
02539         if (!CACHE(parser, 1)) goto error;
02540     }
02541 
02542     /* Check if the trailing character is '!' and copy it. */
02543 
02544     if (CHECK(parser->buffer, '!'))
02545     {
02546         if (!READ(parser, string)) goto error;
02547     }
02548     else
02549     {
02550         /*
02551          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
02552          * directive, it's an error.  If it's a tag token, it must be a part of
02553          * URI.
02554          */
02555 
02556         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
02557             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
02558                     start_mark, "did not find expected '!'");
02559             goto error;
02560         }
02561     }
02562 
02563     *handle = string.start;
02564 
02565     return 1;
02566 
02567 error:
02568     STRING_DEL(parser, string);
02569     return 0;
02570 }
02571 
02572 /*
02573  * Scan a tag.
02574  */
02575 
02576 static int
02577 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
02578         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
02579 {
02580     size_t length = head ? strlen((char *)head) : 0;
02581     yaml_string_t string = NULL_STRING;
02582 
02583     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02584 
02585     /* Resize the string to include the head. */
02586 
02587     while ((size_t)(string.end - string.start) <= length) {
02588         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
02589             parser->error = YAML_MEMORY_ERROR;
02590             goto error;
02591         }
02592     }
02593 
02594     /*
02595      * Copy the head if needed.
02596      *
02597      * Note that we don't copy the leading '!' character.
02598      */
02599 
02600     if (length > 1) {
02601         memcpy(string.start, head+1, length-1);
02602         string.pointer += length-1;
02603     }
02604 
02605     /* Scan the tag. */
02606 
02607     if (!CACHE(parser, 1)) goto error;
02608 
02609     /*
02610      * The set of characters that may appear in URI is as follows:
02611      *
02612      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
02613      *      '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
02614      *      '%'.
02615      */
02616 
02617     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
02618             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
02619             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
02620             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
02621             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
02622             || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
02623             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
02624             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
02625             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
02626             || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
02627             || CHECK(parser->buffer, '%'))
02628     {
02629         /* Check if it is a URI-escape sequence. */
02630 
02631         if (CHECK(parser->buffer, '%')) {
02632             if (!STRING_EXTEND(parser, string))
02633                 goto error;
02634 
02635             if (!yaml_parser_scan_uri_escapes(parser,
02636                         directive, start_mark, &string)) goto error;
02637         }
02638         else {
02639             if (!READ(parser, string)) goto error;
02640         }
02641 
02642         length ++;
02643         if (!CACHE(parser, 1)) goto error;
02644     }
02645 
02646     /* Check if the tag is non-empty. */
02647 
02648     if (!length) {
02649         if (!STRING_EXTEND(parser, string))
02650             goto error;
02651 
02652         yaml_parser_set_scanner_error(parser, directive ?
02653                 "while parsing a %TAG directive" : "while parsing a tag",
02654                 start_mark, "did not find expected tag URI");
02655         goto error;
02656     }
02657 
02658     *uri = string.start;
02659 
02660     return 1;
02661 
02662 error:
02663     STRING_DEL(parser, string);
02664     return 0;
02665 }
02666 
02667 /*
02668  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
02669  */
02670 
02671 static int
02672 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
02673         yaml_mark_t start_mark, yaml_string_t *string)
02674 {
02675     int width = 0;
02676 
02677     /* Decode the required number of characters. */
02678 
02679     do {
02680 
02681         unsigned char octet = 0;
02682 
02683         /* Check for a URI-escaped octet. */
02684 
02685         if (!CACHE(parser, 3)) return 0;
02686 
02687         if (!(CHECK(parser->buffer, '%')
02688                     && IS_HEX_AT(parser->buffer, 1)
02689                     && IS_HEX_AT(parser->buffer, 2))) {
02690             return yaml_parser_set_scanner_error(parser, directive ?
02691                     "while parsing a %TAG directive" : "while parsing a tag",
02692                     start_mark, "did not find URI escaped octet");
02693         }
02694 
02695         /* Get the octet. */
02696 
02697         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
02698 
02699         /* If it is the leading octet, determine the length of the UTF-8 sequence. */
02700 
02701         if (!width)
02702         {
02703             width = (octet & 0x80) == 0x00 ? 1 :
02704                     (octet & 0xE0) == 0xC0 ? 2 :
02705                     (octet & 0xF0) == 0xE0 ? 3 :
02706                     (octet & 0xF8) == 0xF0 ? 4 : 0;
02707             if (!width) {
02708                 return yaml_parser_set_scanner_error(parser, directive ?
02709                         "while parsing a %TAG directive" : "while parsing a tag",
02710                         start_mark, "found an incorrect leading UTF-8 octet");
02711             }
02712         }
02713         else
02714         {
02715             /* Check if the trailing octet is correct. */
02716 
02717             if ((octet & 0xC0) != 0x80) {
02718                 return yaml_parser_set_scanner_error(parser, directive ?
02719                         "while parsing a %TAG directive" : "while parsing a tag",
02720                         start_mark, "found an incorrect trailing UTF-8 octet");
02721             }
02722         }
02723 
02724         /* Copy the octet and move the pointers. */
02725 
02726         *(string->pointer++) = octet;
02727         SKIP(parser);
02728         SKIP(parser);
02729         SKIP(parser);
02730 
02731     } while (--width);
02732 
02733     return 1;
02734 }
02735 
02736 /*
02737  * Scan a block scalar.
02738  */
02739 
02740 static int
02741 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
02742         int literal)
02743 {
02744     yaml_mark_t start_mark;
02745     yaml_mark_t end_mark;
02746     yaml_string_t string = NULL_STRING;
02747     yaml_string_t leading_break = NULL_STRING;
02748     yaml_string_t trailing_breaks = NULL_STRING;
02749     int chomping = 0;
02750     int increment = 0;
02751     int indent = 0;
02752     int leading_blank = 0;
02753     int trailing_blank = 0;
02754 
02755     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
02756     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
02757     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
02758 
02759     /* Eat the indicator '|' or '>'. */
02760 
02761     start_mark = parser->mark;
02762 
02763     SKIP(parser);
02764 
02765     /* Scan the additional block scalar indicators. */
02766 
02767     if (!CACHE(parser, 1)) goto error;
02768 
02769     /* Check for a chomping indicator. */
02770 
02771     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
02772     {
02773         /* Set the chomping method and eat the indicator. */
02774 
02775         chomping = CHECK(parser->buffer, '+') ? +1 : -1;
02776 
02777         SKIP(parser);
02778 
02779         /* Check for an indentation indicator. */
02780 
02781         if (!CACHE(parser, 1)) goto error;
02782 
02783         if (IS_DIGIT(parser->buffer))
02784         {
02785             /* Check that the intendation is greater than 0. */
02786 
02787             if (CHECK(parser->buffer, '0')) {
02788                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02789                         start_mark, "found an intendation indicator equal to 0");
02790                 goto error;
02791             }
02792 
02793             /* Get the intendation level and eat the indicator. */
02794 
02795             increment = AS_DIGIT(parser->buffer);
02796 
02797             SKIP(parser);
02798         }
02799     }
02800 
02801     /* Do the same as above, but in the opposite order. */
02802 
02803     else if (IS_DIGIT(parser->buffer))
02804     {
02805         if (CHECK(parser->buffer, '0')) {
02806             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02807                     start_mark, "found an intendation indicator equal to 0");
02808             goto error;
02809         }
02810 
02811         increment = AS_DIGIT(parser->buffer);
02812 
02813         SKIP(parser);
02814 
02815         if (!CACHE(parser, 1)) goto error;
02816 
02817         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
02818             chomping = CHECK(parser->buffer, '+') ? +1 : -1;
02819 
02820             SKIP(parser);
02821         }
02822     }
02823 
02824     /* Eat whitespaces and comments to the end of the line. */
02825 
02826     if (!CACHE(parser, 1)) goto error;
02827 
02828     while (IS_BLANK(parser->buffer)) {
02829         SKIP(parser);
02830         if (!CACHE(parser, 1)) goto error;
02831     }
02832 
02833     if (CHECK(parser->buffer, '#')) {
02834         while (!IS_BREAKZ(parser->buffer)) {
02835             SKIP(parser);
02836             if (!CACHE(parser, 1)) goto error;
02837         }
02838     }
02839 
02840     /* Check if we are at the end of the line. */
02841 
02842     if (!IS_BREAKZ(parser->buffer)) {
02843         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02844                 start_mark, "did not find expected comment or line break");
02845         goto error;
02846     }
02847 
02848     /* Eat a line break. */
02849 
02850     if (IS_BREAK(parser->buffer)) {
02851         if (!CACHE(parser, 2)) goto error;
02852         SKIP_LINE(parser);
02853     }
02854 
02855     end_mark = parser->mark;
02856 
02857     /* Set the intendation level if it was specified. */
02858 
02859     if (increment) {
02860         indent = parser->indent >= 0 ? parser->indent+increment : increment;
02861     }
02862 
02863     /* Scan the leading line breaks and determine the indentation level if needed. */
02864 
02865     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
02866                 start_mark, &end_mark)) goto error;
02867 
02868     /* Scan the block scalar content. */
02869 
02870     if (!CACHE(parser, 1)) goto error;
02871 
02872     while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
02873     {
02874         /*
02875          * We are at the beginning of a non-empty line.
02876          */
02877 
02878         /* Is it a trailing whitespace? */
02879 
02880         trailing_blank = IS_BLANK(parser->buffer);
02881 
02882         /* Check if we need to fold the leading line break. */
02883 
02884         if (!literal && (*leading_break.start == '\n')
02885                 && !leading_blank && !trailing_blank)
02886         {
02887             /* Do we need to join the lines by space? */
02888 
02889             if (*trailing_breaks.start == '\0') {
02890                 if (!STRING_EXTEND(parser, string)) goto error;
02891                 *(string.pointer ++) = ' ';
02892             }
02893 
02894             CLEAR(parser, leading_break);
02895         }
02896         else {
02897             if (!JOIN(parser, string, leading_break)) goto error;
02898             CLEAR(parser, leading_break);
02899         }
02900 
02901         /* Append the remaining line breaks. */
02902 
02903         if (!JOIN(parser, string, trailing_breaks)) goto error;
02904         CLEAR(parser, trailing_breaks);
02905 
02906         /* Is it a leading whitespace? */
02907 
02908         leading_blank = IS_BLANK(parser->buffer);
02909 
02910         /* Consume the current line. */
02911 
02912         while (!IS_BREAKZ(parser->buffer)) {
02913             if (!READ(parser, string)) goto error;
02914             if (!CACHE(parser, 1)) goto error;
02915         }
02916 
02917         /* Consume the line break. */
02918 
02919         if (!CACHE(parser, 2)) goto error;
02920 
02921         if (!READ_LINE(parser, leading_break)) goto error;
02922 
02923         /* Eat the following intendation spaces and line breaks. */
02924 
02925         if (!yaml_parser_scan_block_scalar_breaks(parser,
02926                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
02927     }
02928 
02929     /* Chomp the tail. */
02930 
02931     if (chomping != -1) {
02932         if (!JOIN(parser, string, leading_break)) goto error;
02933     }
02934     if (chomping == 1) {
02935         if (!JOIN(parser, string, trailing_breaks)) goto error;
02936     }
02937 
02938     /* Create a token. */
02939 
02940     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
02941             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
02942             start_mark, end_mark);
02943 
02944     STRING_DEL(parser, leading_break);
02945     STRING_DEL(parser, trailing_breaks);
02946 
02947     return 1;
02948 
02949 error:
02950     STRING_DEL(parser, string);
02951     STRING_DEL(parser, leading_break);
02952     STRING_DEL(parser, trailing_breaks);
02953 
02954     return 0;
02955 }
02956 
02957 /*
02958  * Scan intendation spaces and line breaks for a block scalar.  Determine the
02959  * intendation level if needed.
02960  */
02961 
02962 static int
02963 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
02964         int *indent, yaml_string_t *breaks,
02965         yaml_mark_t start_mark, yaml_mark_t *end_mark)
02966 {
02967     int max_indent = 0;
02968 
02969     *end_mark = parser->mark;
02970 
02971     /* Eat the intendation spaces and line breaks. */
02972 
02973     while (1)
02974     {
02975         /* Eat the intendation spaces. */
02976 
02977         if (!CACHE(parser, 1)) return 0;
02978 
02979         while ((!*indent || (int)parser->mark.column < *indent)
02980                 && IS_SPACE(parser->buffer)) {
02981             SKIP(parser);
02982             if (!CACHE(parser, 1)) return 0;
02983         }
02984 
02985         if ((int)parser->mark.column > max_indent)
02986             max_indent = (int)parser->mark.column;
02987 
02988         /* Check for a tab character messing the intendation. */
02989 
02990         if ((!*indent || (int)parser->mark.column < *indent)
02991                 && IS_TAB(parser->buffer)) {
02992             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
02993                     start_mark, "found a tab character where an intendation space is expected");
02994         }
02995 
02996         /* Have we found a non-empty line? */
02997 
02998         if (!IS_BREAK(parser->buffer)) break;
02999 
03000         /* Consume the line break. */
03001 
03002         if (!CACHE(parser, 2)) return 0;
03003         if (!READ_LINE(parser, *breaks)) return 0;
03004         *end_mark = parser->mark;
03005     }
03006 
03007     /* Determine the indentation level if needed. */
03008 
03009     if (!*indent) {
03010         *indent = max_indent;
03011         if (*indent < parser->indent + 1)
03012             *indent = parser->indent + 1;
03013         if (*indent < 1)
03014             *indent = 1;
03015     }
03016 
03017    return 1;
03018 }
03019 
03020 /*
03021  * Scan a quoted scalar.
03022  */
03023 
03024 static int
03025 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
03026         int single)
03027 {
03028     yaml_mark_t start_mark;
03029     yaml_mark_t end_mark;
03030     yaml_string_t string = NULL_STRING;
03031     yaml_string_t leading_break = NULL_STRING;
03032     yaml_string_t trailing_breaks = NULL_STRING;
03033     yaml_string_t whitespaces = NULL_STRING;
03034     int leading_blanks;
03035 
03036     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
03037     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
03038     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
03039     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
03040 
03041     /* Eat the left quote. */
03042 
03043     start_mark = parser->mark;
03044 
03045     SKIP(parser);
03046 
03047     /* Consume the content of the quoted scalar. */
03048 
03049     while (1)
03050     {
03051         /* Check that there are no document indicators at the beginning of the line. */
03052 
03053         if (!CACHE(parser, 4)) goto error;
03054 
03055         if (parser->mark.column == 0 &&
03056             ((CHECK_AT(parser->buffer, '-', 0) &&
03057               CHECK_AT(parser->buffer, '-', 1) &&
03058               CHECK_AT(parser->buffer, '-', 2)) ||
03059              (CHECK_AT(parser->buffer, '.', 0) &&
03060               CHECK_AT(parser->buffer, '.', 1) &&
03061               CHECK_AT(parser->buffer, '.', 2))) &&
03062             IS_BLANKZ_AT(parser->buffer, 3))
03063         {
03064             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
03065                     start_mark, "found unexpected document indicator");
03066             goto error;
03067         }
03068 
03069         /* Check for EOF. */
03070 
03071         if (IS_Z(parser->buffer)) {
03072             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
03073                     start_mark, "found unexpected end of stream");
03074             goto error;
03075         }
03076 
03077         /* Consume non-blank characters. */
03078 
03079         if (!CACHE(parser, 2)) goto error;
03080 
03081         leading_blanks = 0;
03082 
03083         while (!IS_BLANKZ(parser->buffer))
03084         {
03085             /* Check for an escaped single quote. */
03086 
03087             if (single && CHECK_AT(parser->buffer, '\'', 0)
03088                     && CHECK_AT(parser->buffer, '\'', 1))
03089             {
03090                 if (!STRING_EXTEND(parser, string)) goto error;
03091                 *(string.pointer++) = '\'';
03092                 SKIP(parser);
03093                 SKIP(parser);
03094             }
03095 
03096             /* Check for the right quote. */
03097 
03098             else if (CHECK(parser->buffer, single ? '\'' : '"'))
03099             {
03100                 break;
03101             }
03102 
03103             /* Check for an escaped line break. */
03104 
03105             else if (!single && CHECK(parser->buffer, '\\')
03106                     && IS_BREAK_AT(parser->buffer, 1))
03107             {
03108                 if (!CACHE(parser, 3)) goto error;
03109                 SKIP(parser);
03110                 SKIP_LINE(parser);
03111                 leading_blanks = 1;
03112                 break;
03113             }
03114 
03115             /* Check for an escape sequence. */
03116 
03117             else if (!single && CHECK(parser->buffer, '\\'))
03118             {
03119                 size_t code_length = 0;
03120 
03121                 if (!STRING_EXTEND(parser, string)) goto error;
03122 
03123                 /* Check the escape character. */
03124 
03125                 switch (parser->buffer.pointer[1])
03126                 {
03127                     case '0':
03128                         *(string.pointer++) = '\0';
03129                         break;
03130 
03131                     case 'a':
03132                         *(string.pointer++) = '\x07';
03133                         break;
03134 
03135                     case 'b':
03136                         *(string.pointer++) = '\x08';
03137                         break;
03138 
03139                     case 't':
03140                     case '\t':
03141                         *(string.pointer++) = '\x09';
03142                         break;
03143 
03144                     case 'n':
03145                         *(string.pointer++) = '\x0A';
03146                         break;
03147 
03148                     case 'v':
03149                         *(string.pointer++) = '\x0B';
03150                         break;
03151 
03152                     case 'f':
03153                         *(string.pointer++) = '\x0C';
03154                         break;
03155 
03156                     case 'r':
03157                         *(string.pointer++) = '\x0D';
03158                         break;
03159 
03160                     case 'e':
03161                         *(string.pointer++) = '\x1B';
03162                         break;
03163 
03164                     case ' ':
03165                         *(string.pointer++) = '\x20';
03166                         break;
03167 
03168                     case '"':
03169                         *(string.pointer++) = '"';
03170                         break;
03171 
03172                     case '\'':
03173                         *(string.pointer++) = '\'';
03174                         break;
03175 
03176                     case '\\':
03177                         *(string.pointer++) = '\\';
03178                         break;
03179 
03180                     case 'N':   /* NEL (#x85) */
03181                         *(string.pointer++) = '\xC2';
03182                         *(string.pointer++) = '\x85';
03183                         break;
03184 
03185                     case '_':   /* #xA0 */
03186                         *(string.pointer++) = '\xC2';
03187                         *(string.pointer++) = '\xA0';
03188                         break;
03189 
03190                     case 'L':   /* LS (#x2028) */
03191                         *(string.pointer++) = '\xE2';
03192                         *(string.pointer++) = '\x80';
03193                         *(string.pointer++) = '\xA8';
03194                         break;
03195 
03196                     case 'P':   /* PS (#x2029) */
03197                         *(string.pointer++) = '\xE2';
03198                         *(string.pointer++) = '\x80';
03199                         *(string.pointer++) = '\xA9';
03200                         break;
03201 
03202                     case 'x':
03203                         code_length = 2;
03204                         break;
03205 
03206                     case 'u':
03207                         code_length = 4;
03208                         break;
03209 
03210                     case 'U':
03211                         code_length = 8;
03212                         break;
03213 
03214                     default:
03215                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03216                                 start_mark, "found unknown escape character");
03217                         goto error;
03218                 }
03219 
03220                 SKIP(parser);
03221                 SKIP(parser);
03222 
03223                 /* Consume an arbitrary escape code. */
03224 
03225                 if (code_length)
03226                 {
03227                     unsigned int value = 0;
03228                     size_t k;
03229 
03230                     /* Scan the character value. */
03231 
03232                     if (!CACHE(parser, code_length)) goto error;
03233 
03234                     for (k = 0; k < code_length; k ++) {
03235                         if (!IS_HEX_AT(parser->buffer, k)) {
03236                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03237                                     start_mark, "did not find expected hexdecimal number");
03238                             goto error;
03239                         }
03240                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
03241                     }
03242 
03243                     /* Check the value and write the character. */
03244 
03245                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
03246                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
03247                                 start_mark, "found invalid Unicode character escape code");
03248                         goto error;
03249                     }
03250 
03251                     if (value <= 0x7F) {
03252                         *(string.pointer++) = value;
03253                     }
03254                     else if (value <= 0x7FF) {
03255                         *(string.pointer++) = 0xC0 + (value >> 6);
03256                         *(string.pointer++) = 0x80 + (value & 0x3F);
03257                     }
03258                     else if (value <= 0xFFFF) {
03259                         *(string.pointer++) = 0xE0 + (value >> 12);
03260                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
03261                         *(string.pointer++) = 0x80 + (value & 0x3F);
03262                     }
03263                     else {
03264                         *(string.pointer++) = 0xF0 + (value >> 18);
03265                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
03266                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
03267                         *(string.pointer++) = 0x80 + (value & 0x3F);
03268                     }
03269 
03270                     /* Advance the pointer. */
03271 
03272                     for (k = 0; k < code_length; k ++) {
03273                         SKIP(parser);
03274                     }
03275                 }
03276             }
03277 
03278             else
03279             {
03280                 /* It is a non-escaped non-blank character. */
03281 
03282                 if (!READ(parser, string)) goto error;
03283             }
03284 
03285             if (!CACHE(parser, 2)) goto error;
03286         }
03287 
03288         /* Check if we are at the end of the scalar. */
03289 
03290         if (CHECK(parser->buffer, single ? '\'' : '"'))
03291             break;
03292 
03293         /* Consume blank characters. */
03294 
03295         if (!CACHE(parser, 1)) goto error;
03296 
03297         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
03298         {
03299             if (IS_BLANK(parser->buffer))
03300             {
03301                 /* Consume a space or a tab character. */
03302 
03303                 if (!leading_blanks) {
03304                     if (!READ(parser, whitespaces)) goto error;
03305                 }
03306                 else {
03307                     SKIP(parser);
03308                 }
03309             }
03310             else
03311             {
03312                 if (!CACHE(parser, 2)) goto error;
03313 
03314                 /* Check if it is a first line break. */
03315 
03316                 if (!leading_blanks)
03317                 {
03318                     CLEAR(parser, whitespaces);
03319                     if (!READ_LINE(parser, leading_break)) goto error;
03320                     leading_blanks = 1;
03321                 }
03322                 else
03323                 {
03324                     if (!READ_LINE(parser, trailing_breaks)) goto error;
03325                 }
03326             }
03327             if (!CACHE(parser, 1)) goto error;
03328         }
03329 
03330         /* Join the whitespaces or fold line breaks. */
03331 
03332         if (leading_blanks)
03333         {
03334             /* Do we need to fold line breaks? */
03335 
03336             if (leading_break.start[0] == '\n') {
03337                 if (trailing_breaks.start[0] == '\0') {
03338                     if (!STRING_EXTEND(parser, string)) goto error;
03339                     *(string.pointer++) = ' ';
03340                 }
03341                 else {
03342                     if (!JOIN(parser, string, trailing_breaks)) goto error;
03343                     CLEAR(parser, trailing_breaks);
03344                 }
03345                 CLEAR(parser, leading_break);
03346             }
03347             else {
03348                 if (!JOIN(parser, string, leading_break)) goto error;
03349                 if (!JOIN(parser, string, trailing_breaks)) goto error;
03350                 CLEAR(parser, leading_break);
03351                 CLEAR(parser, trailing_breaks);
03352             }
03353         }
03354         else
03355         {
03356             if (!JOIN(parser, string, whitespaces)) goto error;
03357             CLEAR(parser, whitespaces);
03358         }
03359     }
03360 
03361     /* Eat the right quote. */
03362 
03363     SKIP(parser);
03364 
03365     end_mark = parser->mark;
03366 
03367     /* Create a token. */
03368 
03369     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
03370             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
03371             start_mark, end_mark);
03372 
03373     STRING_DEL(parser, leading_break);
03374     STRING_DEL(parser, trailing_breaks);
03375     STRING_DEL(parser, whitespaces);
03376 
03377     return 1;
03378 
03379 error:
03380     STRING_DEL(parser, string);
03381     STRING_DEL(parser, leading_break);
03382     STRING_DEL(parser, trailing_breaks);
03383     STRING_DEL(parser, whitespaces);
03384 
03385     return 0;
03386 }
03387 
03388 /*
03389  * Scan a plain scalar.
03390  */
03391 
03392 static int
03393 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
03394 {
03395     yaml_mark_t start_mark;
03396     yaml_mark_t end_mark;
03397     yaml_string_t string = NULL_STRING;
03398     yaml_string_t leading_break = NULL_STRING;
03399     yaml_string_t trailing_breaks = NULL_STRING;
03400     yaml_string_t whitespaces = NULL_STRING;
03401     int leading_blanks = 0;
03402     int indent = parser->indent+1;
03403 
03404     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
03405     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
03406     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
03407     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
03408 
03409     start_mark = end_mark = parser->mark;
03410 
03411     /* Consume the content of the plain scalar. */
03412 
03413     while (1)
03414     {
03415         /* Check for a document indicator. */
03416 
03417         if (!CACHE(parser, 4)) goto error;
03418 
03419         if (parser->mark.column == 0 &&
03420             ((CHECK_AT(parser->buffer, '-', 0) &&
03421               CHECK_AT(parser->buffer, '-', 1) &&
03422               CHECK_AT(parser->buffer, '-', 2)) ||
03423              (CHECK_AT(parser->buffer, '.', 0) &&
03424               CHECK_AT(parser->buffer, '.', 1) &&
03425               CHECK_AT(parser->buffer, '.', 2))) &&
03426             IS_BLANKZ_AT(parser->buffer, 3)) break;
03427 
03428         /* Check for a comment. */
03429 
03430         if (CHECK(parser->buffer, '#'))
03431             break;
03432 
03433         /* Consume non-blank characters. */
03434 
03435         while (!IS_BLANKZ(parser->buffer))
03436         {
03437             /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
03438 
03439             if (parser->flow_level
03440                     && CHECK(parser->buffer, ':')
03441                     && !IS_BLANKZ_AT(parser->buffer, 1)) {
03442                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
03443                         start_mark, "found unexpected ':'");
03444                 goto error;
03445             }
03446 
03447             /* Check for indicators that may end a plain scalar. */
03448 
03449             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
03450                     || (parser->flow_level &&
03451                         (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
03452                          || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
03453                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
03454                          || CHECK(parser->buffer, '}'))))
03455                 break;
03456 
03457             /* Check if we need to join whitespaces and breaks. */
03458 
03459             if (leading_blanks || whitespaces.start != whitespaces.pointer)
03460             {
03461                 if (leading_blanks)
03462                 {
03463                     /* Do we need to fold line breaks? */
03464 
03465                     if (leading_break.start[0] == '\n') {
03466                         if (trailing_breaks.start[0] == '\0') {
03467                             if (!STRING_EXTEND(parser, string)) goto error;
03468                             *(string.pointer++) = ' ';
03469                         }
03470                         else {
03471                             if (!JOIN(parser, string, trailing_breaks)) goto error;
03472                             CLEAR(parser, trailing_breaks);
03473                         }
03474                         CLEAR(parser, leading_break);
03475                     }
03476                     else {
03477                         if (!JOIN(parser, string, leading_break)) goto error;
03478                         if (!JOIN(parser, string, trailing_breaks)) goto error;
03479                         CLEAR(parser, leading_break);
03480                         CLEAR(parser, trailing_breaks);
03481                     }
03482 
03483                     leading_blanks = 0;
03484                 }
03485                 else
03486                 {
03487                     if (!JOIN(parser, string, whitespaces)) goto error;
03488                     CLEAR(parser, whitespaces);
03489                 }
03490             }
03491 
03492             /* Copy the character. */
03493 
03494             if (!READ(parser, string)) goto error;
03495 
03496             end_mark = parser->mark;
03497 
03498             if (!CACHE(parser, 2)) goto error;
03499         }
03500 
03501         /* Is it the end? */
03502 
03503         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
03504             break;
03505 
03506         /* Consume blank characters. */
03507 
03508         if (!CACHE(parser, 1)) goto error;
03509 
03510         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
03511         {
03512             if (IS_BLANK(parser->buffer))
03513             {
03514                 /* Check for tab character that abuse intendation. */
03515 
03516                 if (leading_blanks && (int)parser->mark.column < indent
03517                         && IS_TAB(parser->buffer)) {
03518                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
03519                             start_mark, "found a tab character that violate intendation");
03520                     goto error;
03521                 }
03522 
03523                 /* Consume a space or a tab character. */
03524 
03525                 if (!leading_blanks) {
03526                     if (!READ(parser, whitespaces)) goto error;
03527                 }
03528                 else {
03529                     SKIP(parser);
03530                 }
03531             }
03532             else
03533             {
03534                 if (!CACHE(parser, 2)) goto error;
03535 
03536                 /* Check if it is a first line break. */
03537 
03538                 if (!leading_blanks)
03539                 {
03540                     CLEAR(parser, whitespaces);
03541                     if (!READ_LINE(parser, leading_break)) goto error;
03542                     leading_blanks = 1;
03543                 }
03544                 else
03545                 {
03546                     if (!READ_LINE(parser, trailing_breaks)) goto error;
03547                 }
03548             }
03549             if (!CACHE(parser, 1)) goto error;
03550         }
03551 
03552         /* Check intendation level. */
03553 
03554         if (!parser->flow_level && (int)parser->mark.column < indent)
03555             break;
03556     }
03557 
03558     /* Create a token. */
03559 
03560     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
03561             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
03562 
03563     /* Note that we change the 'simple_key_allowed' flag. */
03564 
03565     if (leading_blanks) {
03566         parser->simple_key_allowed = 1;
03567     }
03568 
03569     STRING_DEL(parser, leading_break);
03570     STRING_DEL(parser, trailing_breaks);
03571     STRING_DEL(parser, whitespaces);
03572 
03573     return 1;
03574 
03575 error:
03576     STRING_DEL(parser, string);
03577     STRING_DEL(parser, leading_break);
03578     STRING_DEL(parser, trailing_breaks);
03579     STRING_DEL(parser, whitespaces);
03580 
03581     return 0;
03582 }
03583 
03584