%{
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "parser.h"
#include "minibas.h"

#define MAX_STR_CONST 128
#define MAX_STRING_SIZE 128

#undef YY_INPUT
#define YY_INPUT(b,r,s) readInputForLexer(b,&r,s)
char str_buf[MAX_STR_CONST];
char *str_buf_ptr;
int yycolumn;

#define YY_USER_ACTION \
    { \
        yylloc.first_line = yylloc.last_line; \
        yylloc.first_column = yylloc.last_column; \
        yylloc.last_line = yylineno; \
        yylloc.last_column = yycolumn; \
        yycolumn += yyleng; \
    }

/* instr table */
#define SYMLEN 7

struct symbol {
	char sym[SYMLEN];                                                   
	short val;                                                            
} symtab[] = {                                                   
	"CA", 0x00,                                                       
	"IA", 0x10,
	"AA", 0x20,
	"RA", 0x30,
	"SA", 0x40,
	"SM", 0x50,
	"TA", 0x60,
	"SS", 0x70,
	"SI", 0x80,

	"IF", 0xC0,
	"KA", 0xC1,
	"AU", 0xC2,
	"RU", 0xC3,
	"DI", 0xC4,
	"DD", 0xC5,
	"RI", 0xC6,
	"RD", 0xC7,
	"LA", 0xC8,
	"SB", 0xC9,

	"PM", 0xCF,
    
	"AP", 0xD0,
	"TE", 0xE0,
	"TS", 0xF0,
    ""
};

short instlookup(char sym[SYMLEN])
{
	int i,j;
	for (i = 0; symtab[i].sym[0] != '\0'; i++) {
		for (j = 0; j < SYMLEN; j++) {
			if (symtab[i].sym[j] != toupper(sym[j])) {
				goto mismatch;
			}
		}
		goto match;
		mismatch:;
	}
	/* if it ever gets here, the symbol is undefined */
	return -1;

	match:;
	return symtab[i].val;
}


int bin2dec(char * num)
{
    char ch;
    int i, digit, val;

    i = 0;
    val = 0;
    while (ch = num[i++]) {
        digit = (int)ch - (int)'0';
        if (digit < 2) {
            val = (val * 2) + digit;
        } else {
            val = -1;
            break;
        }
    }

    return val;
}

char* copy_str(const char *in)
{
	size_t len = strnlen(in, MAX_STRING_SIZE);
	char* buf = malloc(len + 1);
	strncpy(buf, in, len);
	buf[len] = '\0';
	return buf;
}

%}

%option outfile="lexer.c" header-file="lexer.h"
%option noyywrap

%x c_comment
%x str

EOL  "\n"
COMMA  ","
DOT  "."
AT    "@"
COLON ":"
LPAR  "("
RPAR  ")"
PLUS  "+"
MINUS "-"
TIMES "*"
DIVIDE "/"

WS          [ \t]+
HEX         0[Xx][0-9A-Fa-f]+
DEC         [1-9][0-9]*
OCT         0[0-7]*
BIN         0[Bb][0-1]+
CONSTCHAR   '([^\r^\n^']|\\0)'
IDENT       [A-Za-z_][A-Za-z0-9_]*

BYTE    ".byte"
WORD    ".word"
ORG     ".org"
ASCIZ   ".asciz"
ASCII   ".ascii"
SPACE   ".space"
EQU     ".equ"
ALIGN   ".align"

%%
\"                      { str_buf_ptr = str_buf; BEGIN(str); }

<str>\"                 { /* saw closing quote - all done */
                            BEGIN(INITIAL);
                            *str_buf_ptr = '\0';
                            /* return string constant token type and
                             * value to parser
                             */
                            yylval.ident = str_buf; return T_STRING;        
                        }
<str>\n                 {
                            /* error - unterminated string constant */
                            /* generate error message */
                        }

<str>\\[0-7]{1,3}       {
                            /* octal escape sequence */
                            int result;

                            (void) sscanf( yytext + 1, "%o", &result );

                            if ( result > 0xff )
                                ;/* error, constant is out-of-bounds */

                            *str_buf_ptr++ = result;
                        }

<str>\\[0-9]+           {
                            /* generate error - bad escape sequence; something
                             * like '\48' or '\0777777'
                             */
                        }

<str>\\b                { *str_buf_ptr++ = '\b'; }
<str>\\f                { *str_buf_ptr++ = '\f'; }
<str>\\n                { *str_buf_ptr++ = '\n'; }
<str>\\r                { *str_buf_ptr++ = '\r'; }
<str>\\t                { *str_buf_ptr++ = '\t'; }
<str>\\\                { *str_buf_ptr++ = '\\'; }
<str>\\'                { *str_buf_ptr++ = '\''; }

<str>[^\\\n\"]+         {
                            char *yptr = yytext;

                            while (*yptr)
                                *str_buf_ptr++ = *yptr++;
                        }

{WS}                    { }

"#"[^\n\r]*             { }                     /* ignores everyting that start with # character */
"/*"                    { BEGIN(c_comment); }
<c_comment>[^*\n]+      { }                     /* ignores everyting that is not a * character */
<c_comment>"*"          { }                     /* ignores all * characters that are not followed by a / */
<c_comment>"\n"         { return T_COMM; }
<c_comment>"*/"         { BEGIN(INITIAL); }
\/\/.*                  { }                     /* ignores everyting that start with // character */


{HEX}       { sscanf(yytext, "%x", &yylval.num); return T_NUM; } 
{DEC}       { sscanf(yytext, "%d", &yylval.num); return T_NUM; } 
{OCT}       { sscanf(yytext, "%o", &yylval.num); return T_NUM; } 
{BIN}       { yylval.num = bin2dec(&yytext[2]); return T_NUM; } 
{CONSTCHAR} { yylval.num = yytext[1]; return T_NUM; }
{PLUS}      { return T_PLUS; }
{MINUS}     { return T_MINUS; }
{TIMES}     { return T_TIMES; }
{DIVIDE}    { return T_DIVIDE; }
{LPAR}      { return T_LPAR; }
{RPAR}      { return T_RPAR; }
{COMMA}     { return T_COMMA; }
{DOT}       { return T_DOT; }
{AT}        { return T_AT; }
{EOL}       { yycolumn = 1; return T_EOL; }

{IDENT}     {  
              yylval.ident = copy_str(yytext);
              short op = instlookup(yylval.ident);
              if (op == -1) {
                return T_IDENT;
              }
              else {
                yylval.num = op;
                return T_INSTR;
              }
            }
{IDENT}{COLON}  { yytext[strlen(yytext)-1] = '\000'; yylval.ident = copy_str(yytext); return T_LABEL; }

{BYTE}      { yylval.ident = copy_str(yytext); return T_BYTE; }
{WORD}      { yylval.ident = copy_str(yytext); return T_WORD; }
{ORG}       { yylval.ident = copy_str(yytext); return T_ORG; }
{ASCIZ}     { yylval.ident = copy_str(yytext); return T_ASCIZ; }
{ASCII}     { yylval.ident = copy_str(yytext); return T_ASCII; }
{SPACE}     { yylval.ident = copy_str(yytext); return T_SPACE; }
{EQU}       { yylval.ident = copy_str(yytext); return T_EQU; }
{ALIGN}     { yylval.ident = copy_str(yytext); return T_ALIGN; }

