/*****************************************************************************/ /* */ /* UNIT: NTL2_Assemble_String (Level 1 library routine) */ /* */ /* Author: Nikola Stojanovic */ /* */ /* Revision: 07 JUL 94 Version 1.0 */ /* 15 OCT 94 Version 2.0 */ /* */ /* Function: */ /* */ /* Procedure extracts a "string" (as currently defined) from the given */ /* file, allocates a buffer to hold it and places in the result parameter; */ /* returns the error structure, or NULL if there were no errors */ /* */ /* */ /* Current definition of a "string" in the file: */ /* */ /* - String can be enclosed or not; */ /* - "Enclosed" string starts with single-quote symbol (') and terminates */ /* with the same one; single-quotes are permitted in the string only if */ /* they are within open double-quote (") range; */ /* - String which is not "enclosed" may start with any character except */ /* single-quote (starting "white spaces" will be ignored, though) and */ /* terminates with a new_line (thus, it can contain one line of text */ /* only); */ /* - Any character may be "escaped" with preceding backslash (\) symbol */ /* (backslash itself can be included in the string by escaping it itself, */ /* i.e. by doubling it (\\)); escaped characters do not have any special */ /* control meaning and are placed in the string "as is" (but without */ /* the "escape" character; */ /* - Characters with special meanings can, but need not, be placed in the */ /* assembled string. For enclosed string both opening and closing symbol */ /* are omitted from the buffer. Although they have special meaning, double */ /* quotes are always included in the string buffer. */ /* */ /*****************************************************************************/ #include #include #include #include "ntl2.h" /* Information is read from alignment files by supplementary buffer of size: */ #define IN_LIMIT 512 static char In_Buff [IN_LIMIT]; /* Global input line buffer, for convenience */ /*****************************************************************************/ errind NTL2_Assemble_String (FILE **in_file, char first, char **result) { int enclosed, quoted, escaped, not_done; char ch; int accept_pos; In_Buff [0] = first; accept_pos = 1; /* Initialize the accept parameters */ /* Check whether the string is of "enclosed" kind - if it starts with ' */ if (first == '\'') { enclosed = TRUE; first = fgetc (*in_file); In_Buff [accept_pos++] = first; } else enclosed = FALSE; if ((first != '\"') && (first != '\\')) { quoted = escaped = FALSE; } else if (first == '\\') { quoted = FALSE; escaped = TRUE; } else { quoted = TRUE; escaped = FALSE; } not_done = TRUE; while (not_done) { /* Collect the characters of the string */ ch = fgetc (*in_file); if (ch == EOF) { if ((enclosed) || (quoted) || (escaped)) /* Error condition */ return NTL1_Error_Record (USER_ERROR, ERR_FILE_FORMAT, "_String: Unexpected end-of-file", 0); else { In_Buff [accept_pos] = '\0'; not_done = FALSE; } } else if (ch == '\\') { /* Will the next character be escaped? */ if (escaped) { In_Buff [accept_pos++] = ch; escaped = FALSE; } else escaped = TRUE; } else if (ch == '\"') { /* Double quote seen - may be special symbol */ if (escaped) { In_Buff [accept_pos++] = ch; escaped = FALSE; } else { if (quoted) { In_Buff [accept_pos++] = ch; quoted = FALSE; } else { In_Buff [accept_pos++] = ch; quoted = TRUE; } } } else if (ch == '\'') { /* Possible string termination */ if (escaped) { escaped = FALSE; In_Buff [accept_pos++] = ch; } else if (quoted) In_Buff [accept_pos++] = ch; else if (enclosed) { In_Buff [accept_pos++] = ch; not_done = FALSE; } } else if (ch == '\n') { /* Possible string termination */ if (escaped) return NTL1_Error_Record (USER_ERROR, ERR_FILE_FORMAT, "_String: Ambiguous escape symbol", 0); else if ((enclosed) || (quoted)) In_Buff [accept_pos++] = ch; else { In_Buff [accept_pos] = '\0'; not_done = FALSE; } } else { /* Unless single-quote, this is a character with no special func. */ if (escaped) escaped = FALSE; In_Buff [accept_pos++] = ch; } if (accept_pos == IN_LIMIT - 1) { /* Shortcut - just truncate string */ while (ch != EOF) ch = fgetc (*in_file); In_Buff [accept_pos] = '\0'; *result = NTL0_strsave (In_Buff); return NULL; } } if (enclosed) { /* If string was "enclosed", check legal layout */ if ((accept_pos < 2) || (In_Buff [accept_pos - 1] != '\'')) { } else { In_Buff [accept_pos - 1] = '\0'; *result = NTL0_strsave (&(In_Buff [1])); return NULL; } } else { *result = NTL0_strsave (In_Buff); return NULL; } }