目前进度:字符串识别卡住……
// token.c
#include "token.h"
#include <string.h>
#include <ctype.h>
#define L(ch) tolower(ch)
#define IS_HEX_DIGIT(ch) (isdigit(ch) || (L(ch) >= 'a' && L(ch) <= 'f'))
extern int lineCount;
int is_HEX;
const char* keyWords[] = {
"int", "char", "float", "double",
"short", "long", "if", "else",
"do", "while", "for", "break",
"switch", "case", "void", "return",
"typedef", "struct", "continue"
};
void addToken(char ch, char dest[]) {
int i;
for (i = 0; dest[i] && i < MAX_TOKEN_LEN; ++i);
if (i == MAX_TOKEN_LEN) {
printError("reading too long token.");
}
dest[i] = ch;
dest[i + 1] = 0;
}
int getToken(FILE* fp, char token[]) {
char ch;
token[0] = 0;
is_HEX = 0;
do {
ch = fgetc(fp);
if (ch == '\n') lineCount++;
} while(ch == ' ' || ch == '\n' || ch == '\r');
switch(ch) {
case 'a' ... 'z': case 'A' ... 'Z': case '_':
do {
addToken(ch, token);
} while (isalnum(ch = fgetc(fp)) || ch == '_');
ungetc(ch, fp);
for (int i = 0; i < 19; ++i)
if (strcmp(keyWords[i], token) == 0)
return i;
ch = fgetc(fp);
if (ch == '[') {
addToken(ch, token);
for (ch = fgetc(fp); isdigit(ch); ch = fgetc(fp))
addToken(ch, token);
if (ch != ']') return INVALID_TYPE;
else return addToken(ch, token), ARRAY;
} else return ungetc(ch, fp), IDENT;
case '0':
ch = fgetc(fp);
if (ch == 'x' || ch == 'X')
addToken('0', token), is_HEX = 1;
case '1' ... '9': {
do {
addToken(ch, token);
} while (isdigit(ch = fgetc(fp)) || (is_HEX && IS_HEX_DIGIT(ch)));
switch(ch) {
case 'u': case 'U':
addToken(ch, token);
ch = fgetc(fp);
if (ch == 'l' || ch == 'L') {
addToken(ch, token);
ch = fgetc(fp);
if (ch == 'l' || ch == 'L')
addToken(ch, token);
else ungetc(ch, fp);
} else ungetc(ch, fp);
return INT_CONST;
case 'l': case 'L':
addToken(ch, token);
ch = fgetc(fp);
if (ch == 'l') addToken(ch, token);
else ungetc(ch, fp);
return INT_CONST;
case '.':
ch = fgetc(fp);
if (!isdigit(ch)) return INVALID_TYPE;
else {
addToken('.', token);
do {
addToken(ch, token);
} while (isdigit(ch = fgetc(fp)));
if (ch == 'f') addToken(ch, token);
else ungetc(ch, fp);
return FLOAT_CONST;
}
case ' ': case ';': case ')': case '+': case '-': case '*':
case '/': case '\r': case '\n':
return ungetc(ch, fp), INT_CONST;
default:
return INVALID_TYPE;
}
}
case '.':
do {
addToken(ch, token);
} while (isdigit(ch = fgetc(fp)));
ungetc(ch, fp);
return FLOAT_CONST;
case '\'':{
addToken('\'', token);
ch = fgetc(fp);
if (ch != '\\') {
addToken(ch, token);
ch = fgetc(fp);
if (ch != '\'') return INVALID_TYPE;
else return addToken(ch, token), CHAR_CONST;
} else {
ch = fgetc(fp);
switch(ch) {
case 'n': case 't': case '\\': case '\'': case '\"': case 'r':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '\'') return addToken(ch, token), CHAR_CONST;
else return INVALID_TYPE;
case 'x': case 'X':
addToken(ch, token);
if (IS_HEX_DIGIT(ch)) {
addToken(ch, token);
if (IS_HEX_DIGIT(ch)) addToken(ch, token);
else ungetc(ch, fp);
ch = fgetc(fp);
if (ch != '\'') return INVALID_TYPE;
else return addToken(ch, token), CHAR_CONST;
} else return INVALID_TYPE;
case '0' ... '7':
addToken(ch, token);
ch = fgetc(fp);
if (ch >= '0' && ch <= '7') {
addToken(ch, token);
ch = fgetc(fp);
if (ch >= 48 && ch <= 55) {
addToken(ch, token);
ch = fgetc(fp);
if (ch != '\'') return INVALID_TYPE;
else return addToken(ch, token), CHAR_CONST;
} else {
if (ch != '\'') return INVALID_TYPE;
else return addToken(ch, token), CHAR_CONST;
}
}
default:
return INVALID_TYPE;
}
}
}
case '"': {
addToken(ch, token);
do {
//if (ch == '\\')
}
while (ch != '"' && ch != '\n');
}
case '/':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '/') {
do {
addToken(ch, token);
} while ((ch = fgetc(fp)) != '\n' && ch != EOF);
ungetc(ch, fp);
return COMMENT;
} else if (ch == '*') {
addToken(ch, token);
while (1) {
ch = fgetc(fp);
addToken(ch, token);
if (ch == '\n') {
addToken('\n', token);
addToken('\t', token);
addToken('\t', token);
}
if (ch == '*')
if ((ch = fgetc(fp)) == '/')
return addToken(ch, token), COMMENT;
}
} else return ungetc(ch, fp), DIVIDE;
case ',':
return addToken(ch, token), COMMA;
case ';':
return addToken(ch, token), SEMI;
case '=':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '=') return addToken(ch, token), EQUAL;
else return ungetc(ch, fp), ASSIGN;
case '!':
ch = fgetc(fp);
if (ch == '=')
return addToken('!',token), addToken(ch, token), NOT_EQUAL;
else return INVALID_TYPE;
case '+':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '+') return addToken(ch, token), PLUSPLUS;
else return ungetc(ch, fp), PLUS;
case '-':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '-') return addToken(ch, token), MINUSMINUS;
else return ungetc(ch, fp), MINUS;
case '*':
addToken(ch, token);
return TIMES;
case '>':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '=') return addToken(ch, token), MOREEQUAL;
else return ungetc(ch, fp), MORE;
case '<':
addToken(ch, token);
ch = fgetc(fp);
if (ch == '=') return addToken(ch, token), LESSEQUAL;
else return ungetc(ch, fp), LESS;
case '(':
return addToken(ch, token), LP;
case ')':
return addToken(ch, token), RP;
case '[':
return addToken(ch, token), LM;
case ']':
return addToken(ch, token), RM;
case '{':
return addToken(ch, token), LB;
case '}':
return addToken(ch, token), RB;
case '#':
addToken(ch, token);
if (isalpha(ch = fgetc(fp))) {
do {
addToken(ch, token);
} while(isalpha(ch = fgetc(fp)));
if (strcmp("#include", token) == 0) {
do {
addToken(ch, token);
} while ((ch = fgetc(fp)) != '\n');
return INCLUDE;
} else if (strcmp("#define", token) == 0) {
do {
addToken(ch, token);
} while ((ch = fgetc(fp)) != '\n');
return MACRO;
} else return INVALID_TYPE;
} else return INVALID_TYPE;
case EOF:
return EOF;
}
return INVALID_TYPE;
}