PARE - PARallel Execution in Erlang
Update 3/12/2007: Attempt to restore all the missing vertical bars from the code...
Don echoed a recent sentiment of mine: "Importantly, and unlike say, Erlang or Concurrent Haskell, we don't have to do manual thread creation, synchronisation or communication -- the compiler does all that for us!"
Consider this arbitrary (and rather pointless) piece of Erlang code:
A = a(),
b(),
c().
If I wanted to run the above expressions in parallel (and let us assume that in this case I knew what I was doing, however improbable that might seem), I would normally have to risk RSI with something like this:
PidThis = self(),
PidA = spawn_link(fun() -> PidThis ! {self(), a()} end),
spawn_link(fun() -> b() end),
PidC = spawn_link(fun() -> PidThis ! {self(), c()} end),
A = receive {PidA, ResultA} -> ResultA end,
receive {PidC, ResultC} -> ResultC end.
Ouch.
It would be much nicer if we could just tell the compiler that we want the next 'N' expressions to be executed in parallel and have the compiler handle all the rest of the boilerplate. Like this:
parallel_execution_3,
A = a(),
b(),
c().
Rather fortuitously for this blog entry, that is exactly what the PARE parse transformation module does:
% PARE: PARallel Execution
%
% Caveats:
% Atoms starting with 'parallel_execution_' are consumed by this parse_transform,
% and variables starting with 'parallel_execution_' will be created.
% A process dictionary counter (key: 'parallel_execution_var_id') will be used
% while compiling.
% Change the definition of 'PREFIX' and 'VAR_ID' if these are unsuitable for your
% codebase.
%
% Use:
% A sequence of expressions beginning with an atom of the form
% 'parallel_execution_N' (where N is an integer) will be parallelised by this
% parse transformation. The next 'N' expressions (at the same level as the
% triggering atom) will be converted into a series of spawn/receive expressions,
% and the triggering atom will be removed from the code.
%
% Return-value order will be preserved: the last expression in a list of
% expressions will always be the value returned by that sequence of expressions.
%
% Future:
% Use different triggering atom prefixes for spawn vs spawn_link?
-module(pare).
-author("Philip Robinson").
-vsn('1.0').
-export([parse_transform/2]).
-define(PREFIX, "parallel_execution_").
-define(VAR_ID, parallel_execution_var_id).
parse_transform(ASTIn, _Options) ->
put(?VAR_ID, 0), ASTOut = ast(ASTIn, []), erase(?VAR_ID), ASTOut.
% PARALLELISE_AST/2
ast([{function,Line,NameFun,Arity,ClausesIn} | ASTIn], ASTOut) ->
ast(ASTIn, [{function,Line,NameFun,Arity,clause(ClausesIn, [])} | ASTOut]);
ast([Node | ASTIn], ASTOut) -> ast(ASTIn, [Node | ASTOut]);
ast([], ASTOut) -> lists:reverse(ASTOut).
% PARALLELISE_CLAUSES/2
clause([{clause,Line,Pattern,Guards,ExprsIn} | ClausesIn], ClausesOut) ->
clause(ClausesIn, [{clause,Line,Pattern,Guards,expr(ExprsIn, [])}|
ClausesOut]);
clause([], ClausesOut) -> lists:reverse(ClausesOut).
% PARALLELISE_EXPRS/2 - Searching for a trigger atom.
expr([{atom,_,Atom}=Expr|
ExprsIn], ExprsOut) ->
AtomStr = atom_to_list(Atom),
case lists:prefix(?PREFIX, AtomStr) of
false -> expr(ExprsIn, [Expr|
ExprsOut]);
true ->
N = list_to_integer(element(2, lists:split(length(?PREFIX), AtomStr))),
PidThis = new_var(),
Line = element(2, hd(ExprsIn)),
{RParallelExprs, Rest} = expr(PidThis, ExprsIn, N, [], []),
ExprPidThis = [{match,Line,{var,Line,PidThis},
{call,Line,{atom,Line,self},[]}}],
expr(Rest, RParallelExprs ++ ExprPidThis ++ ExprsOut)
end;
expr([{block,Line,Body}|
ExprsIn], ExprsOut) ->
expr(ExprsIn,[{block,Line,expr(Body, [])}|
ExprsOut]);
expr([{'case',Line,Condition,Clauses}|
ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'case',Line,Condition,clause(Clauses,[])}|
ExprsOut]);
expr([{'if',Line,Clauses}|
ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'if',Line,clause(Clauses,[])}|
ExprsOut]);
expr([{'try',Line,Body,CaseClauses,CatchClauses,After}|
ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'try',Line,expr(Body,[]), clause(CaseClauses,[]),
clause(CatchClauses,[]), expr(After,[])}|
ExprsOut]);
expr([Expr|
ExprsIn], ExprsOut) -> expr(ExprsIn, [Expr
|
ExprsOut]);
expr([], ExprsOut) -> lists:reverse(ExprsOut).
% PARALLELISE_EXPRS/5 - Trigger atom has been found, parallelise the following 'N' expressions.
% Build up a list of expressions to spawn and receive.
expr(_PidThis, ExprsIn, 0, SpawnExprs, ReceiveExprs) ->
{ReceiveExprs ++ SpawnExprs, ExprsIn};
% Match expression:
% Spawn RHS, match receive value to original LHS.
expr(PidThis, [{match,Line,LHS,RHS}|
ExprsIn], N, SpawnExprs, ReceiveExprs) ->
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
[{'fun',Line,{clauses,[{clause,Line,[],[],[{op,Line,'!',{var,Line,PidThis},
{tuple,Line,[{call,Line,{atom,Line,self},[]},RHS]}}]}]}}]}},
Receive = {match,Line,LHS,{'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
[{call,Line,{atom,Line,exit},[VarReason]}]}]}},
expr(PidThis, ExprsIn, N - 1, [Spawn|
SpawnExprs], [Receive | ReceiveExprs]);
% Last expression in parallel block and not a match expression:
% Spawn expression, capture return value as last return from expression sequence.
expr(PidThis, [Expr|
ExprsIn], 1, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
[{'fun',Line,{clauses,[{clause,Line,[],[],[{op,Line,'!',{var,Line,PidThis},
{tuple,Line,[{call,Line,{atom,Line,self},[]},Expr]}}]}]}}]}},
Receive = {'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
[{call,Line,{atom,Line,exit},[VarReason]}]}]},
expr(PidThis, ExprsIn, 0, [Spawn|
SpawnExprs], [Receive
|
ReceiveExprs]);
% Non-match expression:
% Spawn expression, do not wait for a return message.
expr(PidThis, [Expr | ExprsIn], N, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
Spawn = {call,Line,{atom,Line,spawn},
[{'fun',Line,{clauses,[{clause,Line,[],[],[Expr]}]}}]},
expr(PidThis, ExprsIn, N - 1, [Spawn|
SpawnExprs], ReceiveExprs).
% NEW_VAR/0 - Return the next internal PARE variable.
new_var() -> list_to_atom(?PREFIX ++ integer_to_list(put(?VAR_ID, get(?VAR_ID) + 1))).
Here is an Erlang version of Don's 'fib' module, using PARE:
-module(fib).
-export([main/0]).
-compile({parse_transform, pare}).
fib(0) -> 0;
fib(1) -> 1;
fib(N) ->
parallel_execution_2,
A = fib(N-1),
B = fib(N-2),
A + B.
main() ->
[io:format("n=~B => ~B~n", [X, fib(X)]) || X <- lists:seq(0, 35)],
ok.
Postscript:
A handy thing to have when developing parse-transformations is a 'showast' module:
-module(showast).
-export([parse_transform/2]).
parse_transform(AST, _Options) ->
io:format("AST:~n~p~n", [AST]),
AST.
Include it twice in your testing code to get snapshots of the test module's AST before and after your parse_transform has had a go at it:
-module(test).
-export([test/0]).
-compile([{parse_transform, showast},
{parse_transform, pare}, {parse_transform, showast}]).
test() -> ok.
Alternatively, you could just compile the test module with the 'P' compiler option
c(test, ['P']).
to produce a code listing (in the file "test.P") after the parse-transform has been applied.Post-Postscript:
Setting up a macro or two can save a lot of typing with PARE:
-define(P2, parallel_execution_2).
test() ->
?P2,
io:format("1~n"),
io:format("2~n").
Or you could change PARE to look for a different triggering atom prefix. Be my guest!