; fcallparm.pb
IncludeFile "..\base2.pb"
IncludeFile "..\labels.pb"
IncludeFile "..\symboltable.pb"
Procedure.s VarRef(*V.SVariable)
Select *V\RefKind
Case #Ref_Global
ProcedureReturn "[v_" + *V\Name + "]"
Default
Error("Unknown variable scope kind")
EndSelect
EndProcedure
; Return the asm for accessing the value of a variable
Procedure.s VarValue(Name.s)
Protected *V.SSymbol = LookupSymbol(Name)
If *V And *V\Kind = #SKind_Variable
ProcedureReturn VarRef(*V)
Else
Error("Undeclared variable: " + Name)
EndIf
EndProcedure
Declare Expression()
Procedure FunctionParameters(*F.SFunction)
Protected ParamStackSize = *F\ParamCount*4 ; Note, it's 4 because we use longs. Doubles and quads would be 8!
Protected I
Emit2("sub ", "esp", Str(ParamStackSize)) ; Make room on the stack for the parameters
For I = 0 To *F\ParamCount-1
Expression()
Emit("mov [esp+" + Str(I*4) + "], eax")
If I <> *F\ParamCount-1 ; If the current parameter is NOT the last one
MatchWhite(',') ; Get a comma and whitespace to prepare for the next parameter
EndIf
Next
EndProcedure
Procedure FunctionCall(Name.s)
Protected *F.SFunction = LookupSymbolNoScopeResolve(Name)
If *F And *F\Kind = #SKind_Function
If *F\ParamCount > 0
FunctionParameters(*F)
EndIf
Emit("call " + *F\Ref)
Else
Error("Undeclared function: " + Name)
EndIf
EndProcedure
Procedure Value()
; value ::= <integer> | <variable> | <fcall()> | '(' <expression> ')'
Protected Name.s
If IsDigit(Look)
Emit("mov eax, " + GetInteger())
ElseIf IsAlpha(Look)
Name = GetName()
If Look = '('
Match('(')
FunctionCall(Name)
Match(')')
Else
Emit("mov eax, " + VarValue(Name))
EndIf
Else
Expected("value")
EndIf
EndProcedure
Procedure Expression()
; expression ::= <value>
Value()
EndProcedure
Procedure Assignment(Name.s)
; assignment ::= <name> = <expression>
MatchWhite('=')
Expression()
Emit2("mov ", VarValue(Name), "eax")
EndProcedure
Procedure VariableDeclaration()
; vardecl ::= var <name> as <type>
Protected Name.s
Protected TypeS.s, TypeI.i
Protected *V.SVariable
; Get name and type
Name = GetName()
If GetName() <> "as"
Expected("as")
EndIf
TypeS = GetName()
AddVariable(Name, TypeS, #Ref_Global, 0)
EndProcedure
Procedure Statement()
Protected N.s
N = GetName()
If Look = '('
Match('(')
FunctionCall(N)
Match(')')
Else
Select N
Case "var": VariableDeclaration()
Default: Assignment(N)
EndSelect
EndIf
EndProcedure
Procedure Program()
; program ::= { <assignment> }
EatWhiteNewlines()
While Look
Statement()
EatWhiteNewlines()
Wend
EndProcedure
Procedure GlobalAsmDefinition(ScopedName.s, *V.SVariable)
If *V\Kind = #SKind_Variable
If *V\RefKind = #Ref_Global
Emit("v_" + *v\Name + " dd 0")
EndIf
EndIf
EndProcedure
Procedure Footer()
Emit("")
Emit("push 0")
Emit("call _ExitProcess")
Emit("; Global variables: ")
EnumerateSymbols(@GlobalAsmDefinition())
EndProcedure
; Add a few functions to the symbol table so we can call them
*F.SFunction = AddFunction("MyFunc")
*F\Type = #TyLong
*F\ParamCount = 1
*F\ParamTypes[0] = #TyLong
*F.SFunction = AddFunction("DoCoolStuff")
*F\Type = #TyLong
*f\ParamCount = 3
*f\ParamTypes[0] = #TyLong
*f\ParamTypes[1] = #TyLong
*f\ParamTypes[2] = #TyLong
InitMulti()
Program()
Footer()
Input()
10 January 2010
Listing: fcallparm.pb
Listing: fcall.pb
; fcall.pb
IncludeFile "..\base2.pb"
IncludeFile "..\labels.pb"
IncludeFile "..\symboltable.pb"
Procedure.s VarRef(*V.SVariable)
Select *V\RefKind
Case #Ref_Global
ProcedureReturn "[v_" + *V\Name + "]"
Default
Error("Unknown variable scope kind")
EndSelect
EndProcedure
; Return the asm for accessing the value of a variable
Procedure.s VarValue(Name.s)
Protected *V.SSymbol = LookupSymbol(Name)
If *V And *V\Kind = #SKind_Variable
ProcedureReturn VarRef(*V)
Else
Error("Undeclared variable: " + Name)
EndIf
EndProcedure
Procedure FunctionCall(Name.s)
*V.SFunction = LookupSymbolNoScopeResolve(Name)
If *V And *v\Kind = #SKind_Function
Emit("call " + *v\Ref)
Else
Error("Undeclared function: " + Name)
EndIf
EndProcedure
Procedure Value()
; value ::= <integer> | <variable> | <fcall()> | '(' <expression> ')'
Protected Name.s
If IsDigit(Look)
Emit("mov eax, " + GetInteger())
ElseIf IsAlpha(Look)
Name = GetName()
If Look = '('
Match('(')
FunctionCall(Name)
Match(')')
Else
Emit("mov eax, " + VarValue(Name))
EndIf
Else
Expected("value")
EndIf
EndProcedure
Procedure Expression()
; expression ::= <value>
Value()
EndProcedure
Procedure Assignment(Name.s)
; assignment ::= <name> = <expression>
MatchWhite('=')
Expression()
Emit2("mov ", VarValue(Name), "eax")
EndProcedure
Procedure VariableDeclaration()
; vardecl ::= var <name> as <type>
Protected Name.s
Protected TypeS.s, TypeI.i
Protected *V.SVariable
; Get name and type
Name = GetName()
If GetName() <> "as"
Expected("as")
EndIf
TypeS = GetName()
AddVariable(Name, TypeS, #Ref_Global, 0)
EndProcedure
Procedure Statement()
Protected N.s
N = GetName()
If Look = '('
Match('(')
FunctionCall(N)
Match(')')
Else
Select N
Case "var": VariableDeclaration()
Default: Assignment(N)
EndSelect
EndIf
EndProcedure
Procedure Program()
; program ::= { <assignment> }
EatWhiteNewlines()
While Look
Statement()
EatWhiteNewlines()
Wend
EndProcedure
Procedure GlobalAsmDefinition(ScopedName.s, *V.SVariable)
If *V\Kind = #SKind_Variable
If *V\RefKind = #Ref_Global
Emit("v_" + *v\Name + " dd 0")
EndIf
EndIf
EndProcedure
Procedure Footer()
Emit("push 0")
Emit("call _ExitProcess")
Emit("; Global variables: ")
EnumerateSymbols(@GlobalAsmDefinition())
EndProcedure
; Add a few functions to the symbol table so we can call them
*F.SFunction = AddFunction("MyFunc")
*F\Type = #TyLong
*F.SFunction = AddFunction("DoCoolStuff")
*F\Type = #TyLong
InitMulti()
Program()
Footer()
Input()
15. Function calls
First of all, some adjustments are needed to the symbol table. If you got symboltable.pb before this was posted, go here to get the working version: http://pbtut.blogspot.com/2010/01/listing-symboltablepb.html
Read through the new symboltable.pb and experiment a bit with it until you understand it.
A function call with no parameters is rather simple, it's just a call instruction, like this:
Make a copy of symsimple.pb and name if fcall.pb. We'll work on fcall.pb now.
A function call, like a variable, represents a value. Thus it makes sense to allow a function call as a new kind of value:
We implement this with a rather simple change of Value():
If a ( follows a name, then the name represents a function call, else the name represents a variable, as before.
On the x86 platform it is customary that function that returns an integer result returns it in the register eax. Which is very convenient for us, because it's the same place we load values from variables!
We emit the function call from a special procedure which checks if the function name is properly declared:
Pay close attention to LookupSymbolNoScopeResolve(). We use this instead of a normal LookupSymbol() to prevent local variables from shadowing global functions. LookupSymbolNoScopeResolve() looks for global symbols only (unless we use a ":" in the name string), thus it will always find a function if it is declared.
Notice also that we have to check if *V is true (not null) before accessing the member *V\Kind. If the lookup fails we must not access *V\Kind because it will result in a memory access error and a crash. If *V is false then *V\Kind will not be accessed due to short-circuit boolean evaluation, see Wikipedia for more information.
That's it! We can now do simple function calls like "callme()" provided we assign the result to some variable:
var a as long
a = callme()
There is a big problem, though: there is no way to declare functions. Just cheat by adding some directly from the source code. Put this near the end of your program, just before the call to InitMulti():
Input example:
By default we prepend an underscore to the names of function to create the function label in asm. This is to avoid name clashing with internal compiler labels, and is standard practice on Windows. On Linux it's not, which of course opens for huge problems.
Ok, so our program works if we assign the result to a variable. What if we want to call the function and ignore the return value, like this:
To allow this we need a new kind of statement: A single function call. The rules are the same as for the value: If the name is followed by a ( it's a function call.
New Statement():
Test that your compiler works as you expect:
Notice that the result of DoCoolStuff() is left in eax only to be overwritten by the next statement (mov eax, [v_a]). This is what we want when we're not storing the result.
Here is the source of fcall.pb: here.
What if you want to do parameters? Now it suddenly gets hard. Because the parameters have to pushed onto the stack in reverse order.
So CallMe(9, 11) should be translated into this:
Now there's no hardware rule that specifies it has to be this way, it's just done this way because the unix/c people rule the world, and these are the people who call the plumber when their printer is on fire, because, you know, ;everything is a pipe, right? Traditionally, Pascal compilers pushed the parameters from left to right as one would expect and everyone was happy.
(Note: The real reason C compilers started to push the parameters in reverse order is because they thought this was the only way to do vararg functions (functions with an unknown number of parameters). Vararg functions are evil, and they can actually be done in a safer and easier way by pushing from left-to-right anyways.)
So what do we do? Since we are not a big company that can afford to spend tons of man-hours on our compiler, we'll take the easy way out and cheat a little. There are two ways of faking right-to-left pushing of parameters.
Way 1: Buffer output. Let's say you have this code: "F16(a, b+1)". First we compile the code to push a (), but instead of outputting it, we write it to a temporary buffer. Then we compile the code to push b+1 (mov eax, dword [v_b], inc eax, push eax). Since this is the last parameter we write it to output and then writes the temporary buffer to output. The result is that statements appear in the correct order in the output!
If you feel intimidated by this mess, rest assured that we won't see it! There is a simpler way.
Way 2: Instead of evaluating b+1 (or 11) before a (or 9), we'll set aside all the required stack space before evaluating any parameters. Then we will evaluate the parameters from left to right and move the result into the correct position on the stack, and call the function:
"sub esp, 8" is is equal to two pushes of 4 bytes each, except we don't specify the values to be pushed, so the values will be undefined.
We move in the correct values next.
This gives a slightly less effective code. Get used to it. Everything that is simple and correct is less effective.
We must modify the FunctionCall() to check if the called function has any formal parameters (parameters declared when defining the function). If the function has formal parameters (*F\ParamCount > 0) we call FunctionParameters(*F) to handle the actual parameters (parameters speficied when calling).
How is the parameter list of a function call structured? Answer: Each parameter is an expression. Each parameter but the last one is followed by a comma (,). The last parameter is followed by a ')'.
In our implementation of FunctionParameters() we already know the number of parameters. So we will loop this many times to get the parameters and eat the commas in between.
What then happens if too few parameters are specified? Like in "ThreeParameters(1, 2)"? In this case we will try to eat a comma but we'll find a ')' instead. So we'll get an error message.
What happens if too many parameters are specified? FunctionParameters() will then return happily, FunctionCall() will emit it's "call" asm code and then return happily, then the code that called FunctionCall() will try to match(')') and when this fails, we will get a generic error message (')' expected, ',' found).
Here's the magic:
Before testing change our predefined functions to take parameters.
First test:
Let's try something more complicated.
It's hard to see at a glance whether this code is correct. Let's test it in PB:
Some issues with function calling that I haven't covered:
- You push the parameters onto the stack, but don't take them off. They are taken off the stack inside the function. So when a call returns the stack pointer is where it was before you pushed the parameters. There is an exception: some functions uses the so called "cdecl" calling convention. In this case you have to adjust the stack pointer yourself after the call. This is rare on Windows.
- Functions are free to modify the three registers eax, ecx and edx. These registers are called "volatile", and you must expect them to contain nonsense values after a call instruction. The exception is eax, which contains the return value of the function, if any. If the function does not have a return value (not even 0) then expect eax to be a random number as well.
So if you have some valuable value in one of these registers you must push it BEFORE pushing the function parameters and pop it after the call instruction. This really becomes an issue when dealing with expressions.
- The stack is always aligned by 4. So even if the parameters are bytes or words, they will use 4 bytes each, of which 3 or 2 are wasted.
- Larger types (like double and quad) use 8 bytes each. You must use two mov instructions to cover them!
- When you implement types in your compiler, you will have to ensure that actual parameters are converted to the correct type (the type of the formal parameters) as they are pushed on the stack, else havoc will ensue.
- When returning floating point values from functions, they are not returned in eax, but in ST0, which is a floating point register. This applies to doubles, floats and extended (10-byte float).
- If a floating point result (in ST0) is to be discarded, it MUST BE POPPED OF THE FPU STACK else you'll get problems and it will be near impossible to find the source. Use this asm code to do it: "FSTP st0".
- When returning quads, the first four bytes goes in eax, the next four bytes goes in edx. (Or was it the reverse?)
- I recommend that you read this: http://www.agner.org/optimize/calling_conventions.pdf. You don't have to understand all of it. The interesting parts are 32-bit stdcall (which we use) and 32-bit cdecl (which is very similar and rather common, especially on Linux).
Full listing: fcallparm.pb listing.
Exercises:
1. Single step through the code until you know for sure what's going on.
2. Write a complex (nested) function call and compile it by hand, pretending to be your compiler. Then type the complex function call into the compiler, and see if it generates the same code as you did. If the codes are not identical, examine why, and try again until you get it correct on first try every time.
3. Add the following error message: "Not enough parameters to function " + FunctionName + . Show it if a function is called with too few parameters.
4. Add the following error message: "Too many parameters to function" + FunctionName. Show it if a function is called with too many parameters.
5. Extend the error message from ex. 3 to tell the user how many parameters are actually expected.
Read through the new symboltable.pb and experiment a bit with it until you understand it.
A function call with no parameters is rather simple, it's just a call instruction, like this:
call FunctionAddressOrLabelRepresentingThatAddress
Make a copy of symsimple.pb and name if fcall.pb. We'll work on fcall.pb now.
A function call, like a variable, represents a value. Thus it makes sense to allow a function call as a new kind of value:
value ::= <integer> | <variable> | <fcall()> | '(' <expression> ')'
We implement this with a rather simple change of Value():
Procedure Value()
; value ::= <integer> | <variable> | <fcall()> | '(' <expression> ')'
Protected Name.s
If IsDigit(Look)
Emit("mov eax, " + GetInteger())
ElseIf IsAlpha(Look)
Name = GetName()
If Look = '('
Match('(')
FunctionCall(Name)
Match(')')
Else
Emit("mov eax, " + VarValue(Name))
EndIf
Else
Expected("value")
EndIf
EndProcedure
If a ( follows a name, then the name represents a function call, else the name represents a variable, as before.
On the x86 platform it is customary that function that returns an integer result returns it in the register eax. Which is very convenient for us, because it's the same place we load values from variables!
We emit the function call from a special procedure which checks if the function name is properly declared:
Procedure FunctionCall(Name.s)
*V.SFunction = LookupSymbolNoScopeResolve(Name)
If *V And *V\Kind = #SKind_Function
Emit("call " + *v\Ref)
Else
Error("Undeclared function: " + Name)
EndIf
EndProcedure
Pay close attention to LookupSymbolNoScopeResolve(). We use this instead of a normal LookupSymbol() to prevent local variables from shadowing global functions. LookupSymbolNoScopeResolve() looks for global symbols only (unless we use a ":" in the name string), thus it will always find a function if it is declared.
Notice also that we have to check if *V is true (not null) before accessing the member *V\Kind. If the lookup fails we must not access *V\Kind because it will result in a memory access error and a crash. If *V is false then *V\Kind will not be accessed due to short-circuit boolean evaluation, see Wikipedia for more information.
That's it! We can now do simple function calls like "callme()" provided we assign the result to some variable:
var a as long
a = callme()
There is a big problem, though: there is no way to declare functions. Just cheat by adding some directly from the source code. Put this near the end of your program, just before the call to InitMulti():
; Add a few functions to the symbol table so we can call them
*F.SFunction = AddFunction("MyFunc")
*F\Type = #TyLong
*F.SFunction = AddFunction("DoCoolStuff")
*F\Type = #TyLong
Input example:
var a as long
var b as long
a = MyFunc()
b = DoCoolStuff()
--------------------------
call _MyFunc
mov [v_a], eax
call _DoCoolStuff
mov [v_b], eax
push 0
call _ExitProcess
; Global variables:
v_a dd 0
v_b dd 0
By default we prepend an underscore to the names of function to create the function label in asm. This is to avoid name clashing with internal compiler labels, and is standard practice on Windows. On Linux it's not, which of course opens for huge problems.
Ok, so our program works if we assign the result to a variable. What if we want to call the function and ignore the return value, like this:
CallMe()
To allow this we need a new kind of statement: A single function call. The rules are the same as for the value: If the name is followed by a ( it's a function call.
New Statement():
Procedure Statement()
Protected N.s
N = GetName()
If Look = '('
Match('(')
FunctionCall(N)
Match(')')
Else
Select N
Case "var": VariableDeclaration()
Default: Assignment(N)
EndSelect
EndIf
EndProcedure
Test that your compiler works as you expect:
var a as long
var b as long
a = MyFunc()
DoCoolStuff()
b = a
--------------------------
call _MyFunc
mov [v_a], eax
call _DoCoolStuff
mov eax, [v_a]
mov [v_b], eax
push 0
call _ExitProcess
; Global variables:
v_a dd 0
v_b dd 0
Notice that the result of DoCoolStuff() is left in eax only to be overwritten by the next statement (mov eax, [v_a]). This is what we want when we're not storing the result.
Here is the source of fcall.pb: here.
What if you want to do parameters? Now it suddenly gets hard. Because the parameters have to pushed onto the stack in reverse order.
So CallMe(9, 11) should be translated into this:
push 11It's obvious why this is a problem: Phones have no button for 11! And also, we have to compile 11 before 9 (and remember, 9 and 11 could instead be complex expressions, including other function calls with other parameters), however, we don't get to the 11 before have to compiled the 9! We compile from left to right but the parameters are pushed from right to left!
push 9
call CallMe
Now there's no hardware rule that specifies it has to be this way, it's just done this way because the unix/c people rule the world, and these are the people who call the plumber when their printer is on fire, because, you know, ;everything is a pipe, right? Traditionally, Pascal compilers pushed the parameters from left to right as one would expect and everyone was happy.
(Note: The real reason C compilers started to push the parameters in reverse order is because they thought this was the only way to do vararg functions (functions with an unknown number of parameters). Vararg functions are evil, and they can actually be done in a safer and easier way by pushing from left-to-right anyways.)
So what do we do? Since we are not a big company that can afford to spend tons of man-hours on our compiler, we'll take the easy way out and cheat a little. There are two ways of faking right-to-left pushing of parameters.
Way 1: Buffer output. Let's say you have this code: "F16(a, b+1)". First we compile the code to push a (), but instead of outputting it, we write it to a temporary buffer. Then we compile the code to push b+1 (mov eax, dword [v_b], inc eax, push eax). Since this is the last parameter we write it to output and then writes the temporary buffer to output. The result is that statements appear in the correct order in the output!
mov eax, dword [v_b]This seems like a miracle and even allows you to make function calls as parameters to other function calls. Until you implement local variables, which are accessed by the stack pointer. Since the stack pointer is changed by each push, you'll have to use some extra logic to artificially compensate for this in the code that is generated early (before stack pointer has changed) but outputted late (after stack pointer has been changed by pushing). This works, but it becomes a real mess.
inc eax
push eax
push dword [v_a]
If you feel intimidated by this mess, rest assured that we won't see it! There is a simpler way.
Way 2: Instead of evaluating b+1 (or 11) before a (or 9), we'll set aside all the required stack space before evaluating any parameters. Then we will evaluate the parameters from left to right and move the result into the correct position on the stack, and call the function:
sub esp, 8
mov [esp+4], 9
mov [esp], 11
call CallMeAgainPlease
"sub esp, 8" is is equal to two pushes of 4 bytes each, except we don't specify the values to be pushed, so the values will be undefined.
We move in the correct values next.
This gives a slightly less effective code. Get used to it. Everything that is simple and correct is less effective.
We must modify the FunctionCall() to check if the called function has any formal parameters (parameters declared when defining the function). If the function has formal parameters (*F\ParamCount > 0) we call FunctionParameters(*F) to handle the actual parameters (parameters speficied when calling).
Procedure FunctionCall(Name.s)Notice how FunctionCall() works just as before if the called function has no formal parameters.
Protected *F.SFunction = LookupSymbolNoScopeResolve(Name)
If *F And *F\Kind = #SKind_Function
If *F\ParamCount > 0
FunctionParameters(*F)
EndIf
Emit("call " + *F\Ref)
Else
Error("Undeclared function: " + Name)
EndIf
EndProcedure
How is the parameter list of a function call structured? Answer: Each parameter is an expression. Each parameter but the last one is followed by a comma (,). The last parameter is followed by a ')'.
In our implementation of FunctionParameters() we already know the number of parameters. So we will loop this many times to get the parameters and eat the commas in between.
What then happens if too few parameters are specified? Like in "ThreeParameters(1, 2)"? In this case we will try to eat a comma but we'll find a ')' instead. So we'll get an error message.
What happens if too many parameters are specified? FunctionParameters() will then return happily, FunctionCall() will emit it's "call" asm code and then return happily, then the code that called FunctionCall() will try to match(')') and when this fails, we will get a generic error message (')' expected, ',' found).
Here's the magic:
Declare Expression()Amazing! In just 16 lines (including the modification to FunctionCall()) we implemented function parameters! When you test this code, use the debugger to single-step through this procedure so you really see what's going on in there.
Procedure FunctionParameters(*F.SFunction)
Protected ParamStackSize = *F\ParamCount*4 ; Note, it's 4 because we use longs. Doubles and quads would be 8!
Protected I
Emit2("sub ", "esp", Str(ParamStackSize)) ; Make room on the stack for the parameters
For I = 0 To *F\ParamCount-1
Expression()
Emit("mov [esp+" + Str(I*4) + "], eax")
If I <> *F\ParamCount-1 ; If the current parameter is NOT the last one
MatchWhite(',') ; Get a comma and whitespace to prepare for the next parameter
EndIf
Next
EndProcedure
Before testing change our predefined functions to take parameters.
; Add a few functions to the symbol table so we can call them
*F.SFunction = AddFunction("MyFunc")
*F\Type = #TyLong
*F\ParamCount = 1
*F\ParamTypes[0] = #TyLong
*F.SFunction = AddFunction("DoCoolStuff")
*F\Type = #TyLong
*f\ParamCount = 3
*f\ParamTypes[0] = #TyLong
*f\ParamTypes[1] = #TyLong
*f\ParamTypes[2] = #TyLong
First test:
MyFunc(99)Nice. The code seems correct! We use three statements to load the parameter instead of a simple push, but at least it works. I'm testing it with PB:
--------------------------
sub esp, 4
mov eax, 99
mov [esp+0], eax
call _MyFunc
; demo.pb
; Some cheating is required to make this compatible:
; 1. Label name = _Procedure0 (first procedure of source)
; 2. Insert call to procedure at the end of the source, else PB won't include it
; 3. Enable inline ASM in the compiler options
Procedure MyFunc(a)
Debug a
EndProcedure
; MyFunc(99)
SUB esp, 4
MOV eax, 99
MOV [esp+0], eax
CALL _Procedure0
End
MyFunc(0) ; Make sure procedure is used
Let's try something more complicated.
var a as long
a = DoCoolStuff(DoCoolStuff(1, 2, 3), MyFunc(MyFunc(99)), 123)
--------------------------
sub esp, 12
sub esp, 12
mov eax, 1
mov [esp+0], eax
mov eax, 2
mov [esp+4], eax
mov eax, 3
mov [esp+8], eax
call _DoCoolStuff
mov [esp+0], eax
sub esp, 4
sub esp, 4
mov eax, 99
mov [esp+0], eax
call _MyFunc
mov [esp+0], eax
call _MyFunc
mov [esp+4], eax
mov eax, 123
mov [esp+8], eax
call _DoCoolStuff
mov [v_a], eax
It's hard to see at a glance whether this code is correct. Let's test it in PB:
; demo2.pbWow, the result is correct. If you look at the generated asm file (pbcompiler /commented) you can see that PB outputs only 13 lines while we use 22 lines. So our code isn't as efficient. But: I bet it was faster to write, and easier to understand!
; enable inline asm
; MyFunc = _Procedure0
; DoCoolStuff = _Procedure2
Procedure MyFunc(a)
ProcedureReturn a
EndProcedure
Procedure DoCoolStuff(a, b, c)
ProcedureReturn (a+b)*c ; nonsense calculation
EndProcedure
a = DoCoolStuff(DoCoolStuff(1, 2, 3), MyFunc(MyFunc(99)), 123)
Debug "PB: " + Str(a) ; let's see what PB makes of it
; --------------------------
SUB esp, 12
SUB esp, 12
MOV eax, 1
MOV [esp+0], eax
MOV eax, 2
MOV [esp+4], eax
MOV eax, 3
MOV [esp+8], eax
CALL _Procedure2
MOV [esp+0], eax
SUB esp, 4
SUB esp, 4
MOV eax, 99
MOV [esp+0], eax
CALL _Procedure0
MOV [esp+0], eax
CALL _Procedure0
MOV [esp+4], eax
MOV eax, 123
MOV [esp+8], eax
CALL _Procedure2
MOV [v_a], eax ; moving result directly into PB variable a
Debug "Us: " + Str(a) ; is our result the same as PB?
End
MyFunc(0) ; Make sure procedure is used
DoCoolStuff(0, 0, 0) ; Make sure procedure is used
Some issues with function calling that I haven't covered:
- You push the parameters onto the stack, but don't take them off. They are taken off the stack inside the function. So when a call returns the stack pointer is where it was before you pushed the parameters. There is an exception: some functions uses the so called "cdecl" calling convention. In this case you have to adjust the stack pointer yourself after the call. This is rare on Windows.
- Functions are free to modify the three registers eax, ecx and edx. These registers are called "volatile", and you must expect them to contain nonsense values after a call instruction. The exception is eax, which contains the return value of the function, if any. If the function does not have a return value (not even 0) then expect eax to be a random number as well.
So if you have some valuable value in one of these registers you must push it BEFORE pushing the function parameters and pop it after the call instruction. This really becomes an issue when dealing with expressions.
- The stack is always aligned by 4. So even if the parameters are bytes or words, they will use 4 bytes each, of which 3 or 2 are wasted.
- Larger types (like double and quad) use 8 bytes each. You must use two mov instructions to cover them!
- When you implement types in your compiler, you will have to ensure that actual parameters are converted to the correct type (the type of the formal parameters) as they are pushed on the stack, else havoc will ensue.
- When returning floating point values from functions, they are not returned in eax, but in ST0, which is a floating point register. This applies to doubles, floats and extended (10-byte float).
- If a floating point result (in ST0) is to be discarded, it MUST BE POPPED OF THE FPU STACK else you'll get problems and it will be near impossible to find the source. Use this asm code to do it: "FSTP st0".
- When returning quads, the first four bytes goes in eax, the next four bytes goes in edx. (Or was it the reverse?)
- I recommend that you read this: http://www.agner.org/optimize/calling_conventions.pdf. You don't have to understand all of it. The interesting parts are 32-bit stdcall (which we use) and 32-bit cdecl (which is very similar and rather common, especially on Linux).
Full listing: fcallparm.pb listing.
Exercises:
1. Single step through the code until you know for sure what's going on.
2. Write a complex (nested) function call and compile it by hand, pretending to be your compiler. Then type the complex function call into the compiler, and see if it generates the same code as you did. If the codes are not identical, examine why, and try again until you get it correct on first try every time.
3. Add the following error message: "Not enough parameters to function " + FunctionName + . Show it if a function is called with too few parameters.
4. Add the following error message: "Too many parameters to function" + FunctionName. Show it if a function is called with too many parameters.
5. Extend the error message from ex. 3 to tell the user how many parameters are actually expected.
Listing: symboltable.pb
; symboltable.pb
; Simple symbol table
; Scope separator is ":"
; So a local variable X in the function Func would be Func:X.
; A global variable X would be X.
Structure SSymbol
Name.s
Kind.i
EndStructure
Enumeration ; Symbol kind
#SKind_Variable
#SKind_Function
EndEnumeration
Structure SVariable Extends SSymbol
Type.i
RefKind.i ; Global, local, parameter
RefOffset.i ; Currently not needed for globals
EndStructure
Enumeration ; Ref kind
#Ref_Global
#Ref_Local
#Ref_Parameter
EndEnumeration
Structure SFunction Extends SSymbol
Type.i ; Return type
Ref.s ; Entry point
ParamCount.i
ParamTypes.i[10] ; We allow at most 10 parameters
EndStructure
Enumeration ; Symbol type
#TyLong
EndEnumeration
; Prototype for callback that is called once for each
; symbol in the table (for listing all symbols)
Prototype ProtoSymbolListEnum(ScopedName.s, *Symbol.SSymbol)
; Map of pointers to SSymbol
Global NewMap *Symbols.SSymbol()
; Linked list of the scopes we're in (for use with local variables)
; When looking up symbols we check innermost scopes first
; Innermost scopes will be first in the list
Global NewList CurrentScopes.S()
AddElement(CurrentScopes()) ; Add the global scope
Procedure EnterScope(Scope.s)
FirstElement(CurrentScopes())
Scope = CurrentScopes() + ":" + Scope
ResetList(CurrentScopes())
AddElement(CurrentScopes())
CurrentScopes() = Scope
EndProcedure
Procedure LeaveScope()
FirstElement(CurrentScopes())
DeleteElement(CurrentScopes())
EndProcedure
; Lookup a symbol without scope resolution
; Valid parameters would be "Foo" (global), "Bar:Glass" (local)
Procedure LookupSymbolNoScopeResolve(Name.s)
ProcedureReturn *Symbols(":"+Name)
EndProcedure
; Lookup a symbol using the scope resolution rules of our language.
; For example, I have decided that global variables are accessible
; in functions, however, local variables have the priority in case
; of name clashes, so we test for local names first.
Procedure LookupSymbol(Name.s)
Protected *S
; Try all scopes in order
ForEach CurrentScopes()
*S = *Symbols(CurrentScopes() + ":" + Name)
If *S
ProcedureReturn *S
EndIf
Next
ProcedureReturn 0
EndProcedure
; Lookup a symbol, but look only in the current scope
Procedure LookupSymbolCurrentScope(Name.s)
FirstElement(CurrentScopes())
ProcedureReturn *Symbols(CurrentScopes() + ":" + Name)
EndProcedure
; Add a local or global symbol.
; Note: when in the global scope, added symbols will
; be global even when added with Glbal = 0
Procedure AddSymbol(*Sym.SSymbol, Glbal = 0)
Protected N.s = *Sym\Name
If Not Glbal
; Not explicitly global, add it in the current scope
FirstElement(CurrentScopes())
*Symbols(CurrentScopes()+":"+N) = *Sym
Else
; Global, add without extra scope
*Symbols(N) = *Sym
EndIf
EndProcedure
; List all symbols in a custom callback
Procedure EnumerateSymbols(Callback.ProtoSymbolListEnum)
ForEach *Symbols()
Callback(MapKey(*Symbols()), *Symbols())
Next
EndProcedure
; Convert the type name into a type number.
; For now we have only one type: #TyLong
Procedure TypeFromString(TypeS.s)
Select TypeS
Case "long": ProcedureReturn #TyLong
Default
Error("Invalid type name: " + TypeS)
EndSelect
EndProcedure
Procedure AddVariable(Name.s, TypeS.s, RefKind.i, RefOffset.i)
; Check if it's already declared in this scope
Protected *V.SVariable
*V = LookupSymbolCurrentScope(Name)
If *V
Error("Variable declared twice: " + Name)
EndIf
; Add the variable to the symbol table
*V = AllocateMemory(SizeOf(SVariable))
*V\Name = Name
*V\Kind = #SKind_Variable
*V\Type = TypeFromString(TypeS)
*V\RefKind = RefKind
*V\RefOffset = RefOffset
AddSymbol(*V)
ProcedureReturn *V
EndProcedure
Procedure AddFunction(Name.s, Ref.s = "")
; Check if it's already declared in the global scope
Protected *F.SFunction
*F = LookupSymbolNoScopeResolve(Name)
If *F
Error("Function declared twice: " + Name)
EndIf
; Add the function to the symbol table
*F = AllocateMemory(SizeOf(SFunction))
*F\Name = Name
*F\Kind = #SKind_Function
*F\Type = #TyLong
*F\ParamCount = 0
If Ref = ""
*F\Ref = "_" + Name
Else
*F\Ref = Ref
EndIf
AddSymbol(*F)
ProcedureReturn *F
EndProcedure
;----------------
CompilerIf 0
Procedure ListSymbols(ScopedName.s, *V.SSymbol)
Debug ScopedName + ", " + *V\Name
EndProcedure
N.SVariable
N\Name = "Apple"
N\Kind = #SKind_Variable
N\RefKind = #Ref_Global
N\RefOffset = 0
AddSymbol(@N)
EnterScope("Blah")
O.SVariable
O\Name = "Orange"
O\Kind = #SKind_Variable
O\RefKind = #Ref_Local
O\RefOffset = 0
AddSymbol(@O)
Debug LookupSymbol("Orange")
Debug LookupSymbol("Apple")
LeaveScope()
Debug "--"
Debug LookupSymbol("Orange")
Debug LookupSymbol("Apple")
Debug LookupSymbol("Blah:Orange")
; EnumerateSymbols(@ListSymbols())
CompilerEndIf
Subscribe to:
Posts (Atom)