version 0.6.0

Data Types

type  mod_lexer::t_keyword_name
 Keyword name container. More...
 
type  mod_lexer::t_keyword_list
 List of keywords. More...
 
type  mod_lexer::t_keyword_tree
 Tree of keywords list. More...
 
type  mod_lexer::t_token
 Token definition A token corresponds to the smallest lexical value (lexeme) of a source file. It is composed of one or several characters except special tokens such as 'end of file' and 'error'. It is characterized by its kind (defined above) and its value. More information can be embedded such as its position in the source file. More...
 
type  mod_lexer::t_token_buffer_element
 Token buffer element The lexer has a system of buffering. If you read a token and actually you does not need it yet, you can push it into the buffer. The buffer is built as a stack. Two derived type are defined: 't_token_buffer_element' which is an element of the stack and 't_token_buffer' which is the stack. More...
 
type  mod_lexer::t_token_buffer
 Token buffer. More...
 
type  mod_lexer::t_lexer
 Lexer. More...
 

Functions/Subroutines

subroutine mod_lexer::lexer_read_token (lex, tok)
 Read a token and ignore spaces and new lines. Also detect keywords. More...
 
subroutine mod_lexer::lexer_unget_token (lex, tok)
 Push a token into the buffer. More...
 
subroutine mod_lexer::lexer_peek_token (lex, tok)
 Read a token and push it into the buffer. More...
 
logical function mod_lexer::lexer_next_token (lex, kind, peek)
 Check if the next token is of the given kind. More...
 
subroutine mod_lexer::lexer_set_position (lex, tok)
 Set the source file read position to the position of the given token. More...
 
character(len=:) function, allocatable mod_lexer::token_id_to_string (kind)
 Return a string with the formal name of a token kind. Useful for debug and error management. More...
 
subroutine mod_lexer::lexer_initialize (lex, filename, keywords)
 Initialize a lexer type. More...
 
subroutine mod_lexer::token_finalize (tok)
 Deallocate a token. More...
 
subroutine mod_lexer::lexer_finalize (lex)
 Deallocate a lexer. More...
 
subroutine mod_lexer::lexer_read_new_source_file (lex, filename)
 Open a new source file and switch the reading to this new file. More...
 
subroutine mod_lexer::lexer_read_new_source_file_from_string (lex, string, label)
 Switch the reading to the provided string. More...
 
character function mod_lexer::lexer_peek (lex)
 Read a character without advancing the position in the source file. More...
 
logical function mod_lexer::lexer_next (lex, expect)
 Check if the next character is the expected character. More...
 
subroutine mod_lexer::lexer_skip_line (lex)
 Skip all characters until a new line character is reached or until the file ends. More...
 
logical function mod_lexer::lexer_skip_one_space_char (lex)
 Skip one space characters. Return .true. if any space character has been skipped. Comments are also skipped and are considered as space characters. More...
 
subroutine mod_lexer::lexer_skip_spaces (lex, is_skipped)
 Skip all spaces until a non-space character is reached. Also skip comments. More...
 

Detailed Description

Brief description

The lexer aims to transform a stream of character into a stream of tokens by gathering the characters according to some patterns. For instance, a set of digits put beside each other forms an integer.

Let us recall the lexer/parser organization with the 3 main units:

                                keyword list
                                      v
        ┌──────────┐            ┏━━━━━━━━━━━┓          ┌────────────┐
 files  │          │ characters ┃           ┃  tokens  │            │ ───────>
 ─────> │   File   │ ──────────>┃   Lexer   ┃ ───────> │   Parser   │  tokens / actions
        │          │            ┃           ┃          │            │ <───────
        └──────────┘            ┗━━━━━━━━━━━┛          └────────────┘
       mod_source_file            mod_lexer             mod_parser
                                                        mod_identifier
                                                        mod_scope

Behavior

Usage

As an example, let us consider the following source file (example.txt):

# Print a message to screen
print "I love Notus <3";
# Compute the square of 42 an print the result to screen
square 42;

Specifications:

To create a lexer for this language, the lexer requires a list of keywords to convert identifier tokens (tk_identifier) to keyword tokens (tk_keyword).

The following code initializes the lexer with a list of keywords and a source file:

! Define an enumerator to identify keywords
!enum, bind(c)
! enumerator :: kw_print = 1
! enumerator :: kw_square
!end enum
type(t_lexer) :: lex
type(t_keyword_name), dimension(2) :: keywords
type(t_token) :: tok
! Create a list of keywords
keywords(kw_print )%name = "print"
keywords(kw_square)%name = "square"
! Initialize the lexer with the file name and the keyword list
call lexer_initialize(lex, "example.txt", keywords)

The following code reads the file and displays the tokens:

! Read the first token
call lexer_read_token(lex, tok)
! Loop until EOF is reached
do while(tok%kind /= tk_eof)
! Print the content of the token
select case(tok%kind)
case(tk_error)
write(*,'("Error found at position ",g0)') tok%pos
stop
case(tk_keyword)
write(*,'("kind: ",a," name: ",a)') trim(token_id_to_string(tok%kind)), keywords(tok%keyword_id)
case(tk_string)
write(*,'("kind: ",a," value: ",g0)') trim(token_id_to_string(tok%kind)), tok%string
case(tk_integer)
write(*,'("kind: ",a," value: ",g0)') trim(token_id_to_string(tok%kind)), tok%integer_value
case default
write(*,'("kind: ",a') trim(token_id_to_string(tok%kind))
end select
! Read next token
call lexer_read_token(lex, tok)
end do
write(*,'("EOF")')

Expected output:

kind: keyword name: print
kind: string value: I love Notus <3
kind: semicolon
kind: keyword name: square
kind: integer value: 42
kind: semicolon
EOF

Function/Subroutine Documentation

◆ lexer_finalize()

subroutine mod_lexer::lexer_finalize ( type(t_lexer), intent(inout)  lex)
Parameters
[in,out]lexLexer

◆ lexer_initialize()

subroutine mod_lexer::lexer_initialize ( type(t_lexer), intent(out)  lex,
character(len=*), intent(in)  filename,
type(t_keyword_name), dimension(:), intent(in)  keywords 
)
Parameters
[out]lexLexer to initialize
[in]filenameSource file name
[in]keywordsList of keywords

◆ lexer_next()

logical function mod_lexer::lexer_next ( type(t_lexer), intent(inout)  lex,
character, intent(in)  expect 
)

If the next character does not match the expected character, does not advance the position in the source file.

Parameters
[in,out]lexLexer
[in]expectExpected character
Returns
Return .true. if the next character match the expected character

◆ lexer_next_token()

logical function mod_lexer::lexer_next_token ( type(t_lexer), intent(inout)  lex,
integer, intent(in)  kind,
logical, optional  peek 
)

Behavior:

  • If no match found, the token is pushed into the buffer.
  • If peek is present and set to true, the token is always pushed into the buffer
Parameters
[in,out]lexLexer
[in]kindToken kind
peekPeek instead of get if set to .true.

◆ lexer_peek()

character function mod_lexer::lexer_peek ( type(t_lexer), intent(inout)  lex)
Parameters
[in,out]lexLexer

◆ lexer_peek_token()

subroutine mod_lexer::lexer_peek_token ( type(t_lexer), intent(inout)  lex,
type(t_token), intent(out)  tok 
)
Parameters
[in,out]lexLexer
[in]tokToken

◆ lexer_read_new_source_file()

subroutine mod_lexer::lexer_read_new_source_file ( type(t_lexer), intent(inout)  lex,
character(len=*), intent(in)  filename 
)
Parameters
[in]lexLexer
[in]filenameSource file name

◆ lexer_read_new_source_file_from_string()

subroutine mod_lexer::lexer_read_new_source_file_from_string ( type(t_lexer), intent(inout)  lex,
character(len=*), intent(in)  string,
character(len=*), intent(in)  label 
)
Parameters
[in]lexLexer
[in]stringString character
[in]labelLabel for the string

◆ lexer_read_token()

subroutine mod_lexer::lexer_read_token ( type(t_lexer), intent(inout)  lex,
type(t_token), intent(out)  tok 
)
Parameters
[in,out]lexLexer
[out]tokToken

◆ lexer_set_position()

subroutine mod_lexer::lexer_set_position ( type(t_lexer), intent(inout)  lex,
type(t_token), intent(in)  tok 
)
Parameters
[in,out]lexLexer
[in]tokToken containing the source position

◆ lexer_skip_line()

subroutine mod_lexer::lexer_skip_line ( type(t_lexer), intent(inout)  lex)
Parameters
[in,out]lexLexer

◆ lexer_skip_one_space_char()

logical function mod_lexer::lexer_skip_one_space_char ( type(t_lexer), intent(inout)  lex)
Parameters
[in,out]lexLexer
Returns
Return .true. if any space character has been skipped

◆ lexer_skip_spaces()

subroutine mod_lexer::lexer_skip_spaces ( type(t_lexer), intent(inout)  lex,
logical, intent(out)  is_skipped 
)
Parameters
[in,out]lexLexer
[out]is_skipped.true. if any space character has been skipped

◆ lexer_unget_token()

subroutine mod_lexer::lexer_unget_token ( type(t_lexer), intent(inout)  lex,
type(t_token), intent(in)  tok 
)
Parameters
[in,out]lexLexer
[in]tokToken

◆ token_finalize()

subroutine mod_lexer::token_finalize ( type(t_token), intent(inout)  tok)
Parameters
[in,out]tokToken

◆ token_id_to_string()

character(len=:) function, allocatable mod_lexer::token_id_to_string ( integer, intent(in)  kind)
Parameters
[in]kindToken kind
Returns
name: Formal name of the token