Markup spec/flex
Appearance
/* Scanner for Wikipedia language. Built with flex. */ CARRIAGERETURN \r CARRIAGERETURN_DOUBLE \r\n\r VALIDURLCHARS [a-z0-9\%\/\?\:\@\=\&\$\_\-\+\!\*\'\(\)\,\.] NEWPARAGRAPH \n\n MATH <math> MATH_END <\/math> NOWIKI <nowiki> NOWIKI_END <\/nowiki> GENERICLINK [a-z]+:\/\/{VALIDURLCHARS}+ TITLEDLINK \133{GENERICLINK}\ [^\133]*\135 WIKILINK \133{2}[^\135]+\135{2} CURRENTDAY \{\{CURRENTDAY\}\} CURRENTMONTH \{\{CURRENTMONTH\}\} CURRENTTIME \{\{CURRENTTIME\}\} CURRENTYEAR \{\{CURRENTYEAR\}\} LIST \n[\:\#\;\*] PRE ^\040 PRE_END \n[^\040] HR ^---- H1 ={1} H2 ={2} H3 ={3} H4 ={4} H5 ={5} H6 ={6} EMPHASIZE '{2} SEMPHASIZE '{3} VSEMPHASIZE '{5} LESSERTHAN < GREATERTHAN > %option caseless stack %s list pre %x math nowiki %{ #include <time.h> #include <sys/types.h> #define MAXLIST 32 %} %% %{ /* State variable positions (int state[10]): 0 = pre 1 = h1 2 = h2 3 = h3 4 = h4 5 = h5 6 = h6 7 = emphasis 8 = strong emphasis 9 = very strong emphasis */ int state[10]; /* Temporary variables. */ int i; char j; /* A string used for holding the current content of a list (like *#*) */ char listtext[MAXLIST] = "\0"; /* The variables needed for CURRENTTIME-like substitutions. */ time_t time_since_epoch; struct tm cur_time; /* Set all state variables to 0. */ for (i=0; i<10; i++) { state[i] = 0; } /* Get the time once at execution of program, instead of every call. */ time(&time_since_epoch); gmtime_r(&time_since_epoch, &cur_time); %} {CARRIAGERETURN_DOUBLE} { unput('\n'); } {CARRIAGERETURN} {WIKILINK} { ECHO; } {NOWIKI} { yy_push_state(nowiki); } <nowiki>{NOWIKI_END} { yy_pop_state(); } <nowiki>{LESSERTHAN} { printf("<"); } <nowiki>{GREATERTHAN} { printf(">"); } {MATH} { yy_push_state(math); } <math>{MATH_END} { yy_pop_state(); } {PRE} { if (state[0] == 0) { printf("\n<pre>"); state[0]++; yy_push_state(pre); } } <pre>{PRE_END} { printf("</pre>"); state[0]--; yyless(0); yy_pop_state(); } {HR} { printf("\n<hr>"); } {NEWPARAGRAPH} { printf("\n<p>"); unput('\n'); } {VSEMPHASIZE} { if (state[9] == 0) { printf("<strong><em>"); state[9]++; } else { printf("</strong></em>"); state[9]--; } } {SEMPHASIZE} { if (state[8] == 0) { printf("<strong>"); state[8]++; } else { printf("</strong>"); state[8]--; } } {EMPHASIZE} { if (state[7] == 0) { printf("<em>"); state[7]++; } else { printf("</em>"); state[7]--; } } {H6} { if (state[6] == 0) { printf("<h6>"); state[6]++; } else { printf("</h6>"); state[6]--; } } {H5} { if (state[5] == 0) { printf("<h5>"); state[5]++; } else { printf("</h5>"); state[5]--; } } {H4} { if (state[4] == 0) { printf("<h4>"); state[4]++; } else { printf("</h4>"); state[4]--; } } {H3} { if (state[3] == 0) { printf("<h3>"); state[3]++; } else { printf("</h3>"); state[3]--; } } {H2} { if (state[2] == 0) { printf("<h2>"); state[2]++; } else { printf("</h2>"); state[2]--; } } {H1} { if (state[1] == 0) { printf("<h1>"); state[1]++; } else { printf("</h1>"); state[1]--; } } {TITLEDLINK} { printf("<a href=\""); while (*++yytext != ' ') { printf("%c", *yytext); } /* Print everything up to first space */ printf("\">"); while (*++yytext != ']') { printf("%c", *yytext); } /* Print href text */ printf("</a>"); } {GENERICLINK} { printf("<a href=\""); j = *(yytext + yyleng - 1); /* If the last character of a URL is a '.' or a ',', assume it is punctuation. */ if ((j == '.') || (j == ',')) { *(yytext + yyleng - 1) = '\0'; printf("%s\">%s</a>%c", yytext, yytext, j); } else { printf("%s\">%s</a>", yytext, yytext); } } {CURRENTTIME} { printf("%d:%d", cur_time.tm_hour, cur_time.tm_min); } {CURRENTDAY} { printf("%d", cur_time.tm_mday); } {CURRENTMONTH} { printf("%.2d", (cur_time.tm_mon + 1)); } {CURRENTYEAR} { printf("%d", (cur_time.tm_year + 1900)); } {LIST} { if (strlen(yytext) < MAXLIST) { strcpy(listtext, yytext); /* i = 0; while(listtext[i] != '\0') {} */ } } %% int main (int argc, char **argv) { ++argv, --argc; /* Don't care about name of program. */ yyin = fopen(argv[0], "r"); yylex(); return 0; }