Friday 30 November 2007

PARE - PARallel Execution in Erlang

Update 3/12/2007: Attempt to restore all the missing vertical bars from the code...


Don echoed a recent sentiment of mine: "Importantly, and unlike say, Erlang or Concurrent Haskell, we don't have to do manual thread creation, synchronisation or communication -- the compiler does all that for us!"

Consider this arbitrary (and rather pointless) piece of Erlang code:


A = a(),
b(),
c().


If I wanted to run the above expressions in parallel (and let us assume that in this case I knew what I was doing, however improbable that might seem), I would normally have to risk RSI with something like this:


PidThis = self(),
PidA = spawn_link(fun() -> PidThis ! {self(), a()} end),
spawn_link(fun() -> b() end),
PidC = spawn_link(fun() -> PidThis ! {self(), c()} end),
A = receive {PidA, ResultA} -> ResultA end,
receive {PidC, ResultC} -> ResultC end.


Ouch.

It would be much nicer if we could just tell the compiler that we want the next 'N' expressions to be executed in parallel and have the compiler handle all the rest of the boilerplate. Like this:


parallel_execution_3,
A = a(),
b(),
c().


Rather fortuitously for this blog entry, that is exactly what the PARE parse transformation module does:


% PARE: PARallel Execution
%
% Caveats:
% Atoms starting with 'parallel_execution_' are consumed by this parse_transform,
% and variables starting with 'parallel_execution_' will be created.
% A process dictionary counter (key: 'parallel_execution_var_id') will be used
% while compiling.
% Change the definition of 'PREFIX' and 'VAR_ID' if these are unsuitable for your
% codebase.
%
% Use:
% A sequence of expressions beginning with an atom of the form
% 'parallel_execution_N' (where N is an integer) will be parallelised by this
% parse transformation. The next 'N' expressions (at the same level as the
% triggering atom) will be converted into a series of spawn/receive expressions,
% and the triggering atom will be removed from the code.
%
% Return-value order will be preserved: the last expression in a list of
% expressions will always be the value returned by that sequence of expressions.
%
% Future:
% Use different triggering atom prefixes for spawn vs spawn_link?

-module(pare).
-author("Philip Robinson").
-vsn('1.0').
-export([parse_transform/2]).

-define(PREFIX, "parallel_execution_").
-define(VAR_ID, parallel_execution_var_id).

parse_transform(ASTIn, _Options) ->
put(?VAR_ID, 0), ASTOut = ast(ASTIn, []), erase(?VAR_ID), ASTOut.

% PARALLELISE_AST/2
ast([{function,Line,NameFun,Arity,ClausesIn} | ASTIn], ASTOut) ->
ast(ASTIn, [{function,Line,NameFun,Arity,clause(ClausesIn, [])} | ASTOut]);
ast([Node | ASTIn], ASTOut) -> ast(ASTIn, [Node | ASTOut]);
ast([], ASTOut) -> lists:reverse(ASTOut).

% PARALLELISE_CLAUSES/2
clause([{clause,Line,Pattern,Guards,ExprsIn} | ClausesIn], ClausesOut) ->
clause(ClausesIn, [{clause,Line,Pattern,Guards,expr(ExprsIn, [])}
| ClausesOut]);
clause([], ClausesOut) -> lists:reverse(ClausesOut).

% PARALLELISE_EXPRS/2 - Searching for a trigger atom.
expr([{atom,_,Atom}=Expr
| ExprsIn], ExprsOut) ->
AtomStr = atom_to_list(Atom),
case lists:prefix(?PREFIX, AtomStr) of
false -> expr(ExprsIn, [Expr
| ExprsOut]);
true ->
N = list_to_integer(element(2, lists:split(length(?PREFIX), AtomStr))),
PidThis = new_var(),
Line = element(2, hd(ExprsIn)),
{RParallelExprs, Rest} = expr(PidThis, ExprsIn, N, [], []),
ExprPidThis = [{match,Line,{var,Line,PidThis},
{call,Line,{atom,Line,self},[]}}],
expr(Rest, RParallelExprs ++ ExprPidThis ++ ExprsOut)
end;
expr([{block,Line,Body}
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{block,Line,expr(Body, [])}
| ExprsOut]);
expr([{'case',Line,Condition,Clauses}
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'case',Line,Condition,clause(Clauses,[])}
| ExprsOut]);
expr([{'if',Line,Clauses}
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'if',Line,clause(Clauses,[])}
| ExprsOut]);
expr([{'try',Line,Body,CaseClauses,CatchClauses,After}
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'try',Line,expr(Body,[]), clause(CaseClauses,[]),
clause(CatchClauses,[]), expr(After,[])}
| ExprsOut]);
expr([Expr
| ExprsIn], ExprsOut) -> expr(ExprsIn, [Expr | ExprsOut]);
expr([], ExprsOut) -> lists:reverse(ExprsOut).

% PARALLELISE_EXPRS/5 - Trigger atom has been found, parallelise the following 'N' expressions.
% Build up a list of expressions to spawn and receive.
expr(_PidThis, ExprsIn, 0, SpawnExprs, ReceiveExprs) ->
{ReceiveExprs ++ SpawnExprs, ExprsIn};
% Match expression:
% Spawn RHS, match receive value to original LHS.
expr(PidThis, [{match,Line,LHS,RHS}
| ExprsIn], N, SpawnExprs, ReceiveExprs) ->
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
[{'fun',Line,{clauses,[{clause,Line,[],[],[{op,Line,'!',{var,Line,PidThis},
{tuple,Line,[{call,Line,{atom,Line,self},[]},RHS]}}]}]}}]}},
Receive = {match,Line,LHS,{'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
[{call,Line,{atom,Line,exit},[VarReason]}]}]}},
expr(PidThis, ExprsIn, N - 1, [Spawn
| SpawnExprs], [Receive | ReceiveExprs]);
% Last expression in parallel block and not a match expression:
% Spawn expression, capture return value as last return from expression sequence.
expr(PidThis, [Expr
| ExprsIn], 1, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
[{'fun',Line,{clauses,[{clause,Line,[],[],[{op,Line,'!',{var,Line,PidThis},
{tuple,Line,[{call,Line,{atom,Line,self},[]},Expr]}}]}]}}]}},
Receive = {'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
[{call,Line,{atom,Line,exit},[VarReason]}]}]},
expr(PidThis, ExprsIn, 0, [Spawn
| SpawnExprs], [Receive | ReceiveExprs]);
% Non-match expression:
% Spawn expression, do not wait for a return message.
expr(PidThis, [Expr | ExprsIn], N, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
Spawn = {call,Line,{atom,Line,spawn},
[{'fun',Line,{clauses,[{clause,Line,[],[],[Expr]}]}}]},
expr(PidThis, ExprsIn, N - 1, [Spawn
| SpawnExprs], ReceiveExprs).

% NEW_VAR/0 - Return the next internal PARE variable.
new_var() -> list_to_atom(?PREFIX ++ integer_to_list(put(?VAR_ID, get(?VAR_ID) + 1))).


Here is an Erlang version of Don's 'fib' module, using PARE:


-module(fib).
-export([main/0]).
-compile({parse_transform, pare}).

fib(0) -> 0;
fib(1) -> 1;
fib(N) ->
parallel_execution_2,
A = fib(N-1),
B = fib(N-2),
A + B.

main() ->
[io:format("n=~B => ~B~n", [X, fib(X)]) || X <- lists:seq(0, 35)],
ok.


Postscript:

A handy thing to have when developing parse-transformations is a 'showast' module:


-module(showast).
-export([parse_transform/2]).

parse_transform(AST, _Options) ->
io:format("AST:~n~p~n", [AST]),
AST.


Include it twice in your testing code to get snapshots of the test module's AST before and after your parse_transform has had a go at it:


-module(test).
-export([test/0]).
-compile([{parse_transform, showast},
{parse_transform, pare}, {parse_transform, showast}]).

test() -> ok.


Alternatively, you could just compile the test module with the 'P' compiler option c(test, ['P']). to produce a code listing (in the file "test.P") after the parse-transform has been applied.


Post-Postscript:

Setting up a macro or two can save a lot of typing with PARE:


-define(P2, parallel_execution_2).

test() ->
?P2,
io:format("1~n"),
io:format("2~n").


Or you could change PARE to look for a different triggering atom prefix. Be my guest!

6 comments:

  1. $anon@anon:~/$ erlc pare.erl
    ./pare.erl:36: syntax error before: ASTIn
    ./pare.erl:42: syntax error before: ClausesIn
    ./pare.erl:60: syntax error before: ExprsIn
    ./pare.erl:78: syntax error before: ExprsIn
    ./pare.erl:33: function ast/2 undefined
    ./pare.erl:114: Warning: function new_var/0 is unused


    failure

    ReplyDelete
  2. I like the idea, but I dislike the parse transform syntax, so I wrote my own pareval module: http://paste.lisp.org/display/51674

    With my syntax, you instead write pareval(Expr...), where Expr... is a list of expressions to be evaluated in parallel. If Expr is a match expression, then the pattern variables are matched in the caller's scope.

    Using pareval, your fibb/1 function can be rewritten as:

    fibb(0) -> 0;
    fibb(1) -> 1;
    fibb(N) ->
    pareval(A = fibb(N - 2),
    B = fibb(N - 1)),
    A + B.

    (The version of pareval that I pasted returns a list of the evaluation results, but not necessarily in order. I am considering changing it to just return the atom 'pareval' to prevent accidental misuse.)

    ReplyDelete
  3. Anonymous #2: all of the vertical bars were stripped out of my code when I copied it here. Should be fixed now.

    Matthew: I bow to the master. :-)

    ReplyDelete
  4. This comment has been removed by a blog administrator.

    ReplyDelete
  5. @Matthew, Am I missing something?
    The expressions are spawned in order, and the results are received in the same order. The results may be sent out-of-order, but since the receive in the list comprehension is waiting for the results in order, that doesn't matter.
    So, using the results wouldn't be misuse, as far as I can see.

    ReplyDelete

Obligatory legal stuff

Unless otherwise noted, all code appearing on this blog is released into the public domain and provided "as-is", without any warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In no event shall the author(s) be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the software or the use or other dealings in the software.