Friday, 30 November 2007

PARE - PARallel Execution in Erlang

Update 3/12/2007: Attempt to restore all the missing vertical bars from the code...

Don echoed a recent sentiment of mine: "Importantly, and unlike say, Erlang or Concurrent Haskell, we don't have to do manual thread creation, synchronisation or communication -- the compiler does all that for us!"

Consider this arbitrary (and rather pointless) piece of Erlang code:

A = a(),

If I wanted to run the above expressions in parallel (and let us assume that in this case I knew what I was doing, however improbable that might seem), I would normally have to risk RSI with something like this:

PidThis = self(),
PidA = spawn_link(fun() -> PidThis ! {self(), a()} end),
spawn_link(fun() -> b() end),
PidC = spawn_link(fun() -> PidThis ! {self(), c()} end),
A = receive {PidA, ResultA} -> ResultA end,
receive {PidC, ResultC} -> ResultC end.


It would be much nicer if we could just tell the compiler that we want the next 'N' expressions to be executed in parallel and have the compiler handle all the rest of the boilerplate. Like this:

A = a(),

Rather fortuitously for this blog entry, that is exactly what the PARE parse transformation module does:

% PARE: PARallel Execution
% Caveats:
% Atoms starting with 'parallel_execution_' are consumed by this parse_transform,
% and variables starting with 'parallel_execution_' will be created.
% A process dictionary counter (key: 'parallel_execution_var_id') will be used
% while compiling.
% Change the definition of 'PREFIX' and 'VAR_ID' if these are unsuitable for your
% codebase.
% Use:
% A sequence of expressions beginning with an atom of the form
% 'parallel_execution_N' (where N is an integer) will be parallelised by this
% parse transformation. The next 'N' expressions (at the same level as the
% triggering atom) will be converted into a series of spawn/receive expressions,
% and the triggering atom will be removed from the code.
% Return-value order will be preserved: the last expression in a list of
% expressions will always be the value returned by that sequence of expressions.
% Future:
% Use different triggering atom prefixes for spawn vs spawn_link?

-author("Philip Robinson").

-define(PREFIX, "parallel_execution_").
-define(VAR_ID, parallel_execution_var_id).

parse_transform(ASTIn, _Options) ->
put(?VAR_ID, 0), ASTOut = ast(ASTIn, []), erase(?VAR_ID), ASTOut.

ast([{function,Line,NameFun,Arity,ClausesIn} | ASTIn], ASTOut) ->
ast(ASTIn, [{function,Line,NameFun,Arity,clause(ClausesIn, [])} | ASTOut]);
ast([Node | ASTIn], ASTOut) -> ast(ASTIn, [Node | ASTOut]);
ast([], ASTOut) -> lists:reverse(ASTOut).

clause([{clause,Line,Pattern,Guards,ExprsIn} | ClausesIn], ClausesOut) ->
clause(ClausesIn, [{clause,Line,Pattern,Guards,expr(ExprsIn, [])}
| ClausesOut]);
clause([], ClausesOut) -> lists:reverse(ClausesOut).

% PARALLELISE_EXPRS/2 - Searching for a trigger atom.
| ExprsIn], ExprsOut) ->
AtomStr = atom_to_list(Atom),
case lists:prefix(?PREFIX, AtomStr) of
false -> expr(ExprsIn, [Expr
| ExprsOut]);
true ->
N = list_to_integer(element(2, lists:split(length(?PREFIX), AtomStr))),
PidThis = new_var(),
Line = element(2, hd(ExprsIn)),
{RParallelExprs, Rest} = expr(PidThis, ExprsIn, N, [], []),
ExprPidThis = [{match,Line,{var,Line,PidThis},
expr(Rest, RParallelExprs ++ ExprPidThis ++ ExprsOut)
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{block,Line,expr(Body, [])}
| ExprsOut]);
| ExprsIn], ExprsOut) ->
| ExprsOut]);
| ExprsIn], ExprsOut) ->
| ExprsOut]);
| ExprsIn], ExprsOut) ->
expr(ExprsIn,[{'try',Line,expr(Body,[]), clause(CaseClauses,[]),
clause(CatchClauses,[]), expr(After,[])}
| ExprsOut]);
| ExprsIn], ExprsOut) -> expr(ExprsIn, [Expr | ExprsOut]);
expr([], ExprsOut) -> lists:reverse(ExprsOut).

% PARALLELISE_EXPRS/5 - Trigger atom has been found, parallelise the following 'N' expressions.
% Build up a list of expressions to spawn and receive.
expr(_PidThis, ExprsIn, 0, SpawnExprs, ReceiveExprs) ->
{ReceiveExprs ++ SpawnExprs, ExprsIn};
% Match expression:
% Spawn RHS, match receive value to original LHS.
expr(PidThis, [{match,Line,LHS,RHS}
| ExprsIn], N, SpawnExprs, ReceiveExprs) ->
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
Receive = {match,Line,LHS,{'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
expr(PidThis, ExprsIn, N - 1, [Spawn
| SpawnExprs], [Receive | ReceiveExprs]);
% Last expression in parallel block and not a match expression:
% Spawn expression, capture return value as last return from expression sequence.
expr(PidThis, [Expr
| ExprsIn], 1, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
VarPid = {var,Line,new_var()},
VarReceive = {var,Line,new_var()},
VarReason = {var,Line,new_var()},
Spawn = {match,Line,VarPid,{call,Line,{atom,Line,spawn_link},
Receive = {'receive',Line,[
{clause,Line,[{tuple,Line,[VarPid,VarReceive]}], [], [VarReceive]},
{clause,Line,[{tuple,Line,[{atom,Line,'EXIT'},VarReason]}], [],
expr(PidThis, ExprsIn, 0, [Spawn
| SpawnExprs], [Receive | ReceiveExprs]);
% Non-match expression:
% Spawn expression, do not wait for a return message.
expr(PidThis, [Expr | ExprsIn], N, SpawnExprs, ReceiveExprs) ->
Line = element(2, Expr),
Spawn = {call,Line,{atom,Line,spawn},
expr(PidThis, ExprsIn, N - 1, [Spawn
| SpawnExprs], ReceiveExprs).

% NEW_VAR/0 - Return the next internal PARE variable.
new_var() -> list_to_atom(?PREFIX ++ integer_to_list(put(?VAR_ID, get(?VAR_ID) + 1))).

Here is an Erlang version of Don's 'fib' module, using PARE:

-compile({parse_transform, pare}).

fib(0) -> 0;
fib(1) -> 1;
fib(N) ->
A = fib(N-1),
B = fib(N-2),
A + B.

main() ->
[io:format("n=~B => ~B~n", [X, fib(X)]) || X <- lists:seq(0, 35)],


A handy thing to have when developing parse-transformations is a 'showast' module:


parse_transform(AST, _Options) ->
io:format("AST:~n~p~n", [AST]),

Include it twice in your testing code to get snapshots of the test module's AST before and after your parse_transform has had a go at it:

-compile([{parse_transform, showast},
{parse_transform, pare}, {parse_transform, showast}]).

test() -> ok.

Alternatively, you could just compile the test module with the 'P' compiler option c(test, ['P']). to produce a code listing (in the file "test.P") after the parse-transform has been applied.


Setting up a macro or two can save a lot of typing with PARE:

-define(P2, parallel_execution_2).

test() ->

Or you could change PARE to look for a different triggering atom prefix. Be my guest!

Sunday, 8 July 2007

Erlang and the Very Large Binary

Update 28/07/2007: The issue with pattern-matching on very large binaries has been resolved in R11B5, so the workaround below is no longer needed. Nothing to see here folks, move along... many thanks to the Erlang OTP team for clearing this up.

The following post has been kept purely for posterity.

(Also, see Per's comment where a better workaround than mine is suggested.)


I have these binary files I created from the Netflix data. Some of them are quite large, so for peace of mind I had to do a quick check to see whether Erlang could handle binaries of that size.

It turns out that Erlang can indeed handle some reasonably large binary sizes. Sort of. There was certainly no problem with loading a 300MB binary into RAM. Accessing the elements of this binary, however, proved to be somewhat of a problem.

I had written a simple helper function to manage retrieving elements from memory- or file-based binaries[1]:

bin_get(BytesOffset, BytesToRead, Fd={file_descriptor,prim_file,_}) ->
{ok,Element} = file:pread(Fd, BytesOffset, BytesToRead),
bin_get(BytesOffset, BytesToRead, Bin) ->
<<_:bytesoffset/binary, Element:BytesToRead/binary>> = Bin,

For relatively small BytesOffset values everything worked as expected. But as soon as I tried an offset of 134,217,728 bytes or higher I received a badmatch error from bin_get/3... but only for memory-based binaries. Opening a file descriptor to the same binary and retrieving the same offset value worked just fine, if a bit slower.

There appears to be a maximum element size limit of 2^27 - 1 for binary pattern matching.[2]

There was a simple workaround for this limit - all I needed to do was have a few extra clauses in bin_get/3 and insert multiple anonymous elements into the binary pattern match where needed. Since my largest binary is just a bit over 300MB I could get away with three clauses:

bin_get(BytesOffset, BytesToRead, Bin) when BytesOffset =< 134217727 ->
<<_:BytesOfset/binary, Element:BytesToRead/binary>> = Bin,
bin_get(BytesOffset, BytesToRead, Bin) when BytesOffset =< 268435454 ->
BytesOffset2 = BytesOffset - 134217727,
<<_:134217727/binary, _:BytesOffset2/binary, Element:BytesToRead/binary>> = Bin,
bin_get(BytesOffset, BytesToRead, Bin) ->
BytesOffset2 = BytesOffset - 268435454,
<<_:134217727/binary, _:134217727/binary, _:BytesOffset2/binary, Element:BytesToRead/binary>> = Bin,

I was happy to let a call for an offset greater than 402,653,181 to produce a runtime badmatch error, but not so happy to discover that the code above produced a compile error:

beam/beam_load.c(1551): Error loading function test:bin_get/3: op bs_skip_bits2 f x w u u: no specific operation found

After judicious use of the 'comment out lines of code and recompile the program' debugging technique, I determined that the Erlang compiler really did not like having the two initial anonymous elements in that last binary pattern match. Even turning the underscores into named (but ignored) variables gave the same result.

The solution to the problem raised by my solution to the initial problem was to go recursive:

-define(MAX_BIN_ELEM_SIZE, 134217727).
bin_get(BytesOffset, BytesToRead, Bin) when BytesOffset =< ?MAX_BIN_ELEM_SIZE ->
<<_:BytesOffset/binary, Element:BytesToRead/binary>> = Bin,
bin_get(BytesOffset, BytesToRead, <<_:?MAX_BIN_ELEM_SIZE/binary,Bin/binary>>) ->
bin_get(BytesOffset - ?MAX_BIN_ELEM_SIZE, BytesToRead, Bin).

In other words, keep discarding the first 'magic number' bytes from the binary and subtract the magic number from the offset until our offset is equal to or less than the magic number, then access the element in the binary in the usual manner.

I am not particularly happy with the need for this hack, but the end result lets me use some large in-memory binaries instead of constantly seeking and reading from disk. (If this last attempt hadn't worked then I was going to try breaking the large binaries into 100-MB chunks, and that would have been a much uglier workaround.)

[1] Depending on how much RAM the machine I was running the code on had, I set certain read-only binary files to be either loaded straight into memory or to just have a file handle opened for them. bin_get/3 was written to abstract that difference away from the rest of the code. I would have gotten away with it, too, if it weren't for those pesky errors.

[2] This would be consistent with using a 32-bit word to store the size value (with 4 of those bits used for a type identifier and 1 bit for a sign indicator). I would expect the element limit on a 64-bit architecture to be somewhat larger, and I wouldn't have noticed this problem with the size of the binaries I am currently using.

Thursday, 28 June 2007

Erlang Binary Map

(Updated to clean up some hypertext errors in the code examples.)

The Netflix Challenge dataset is very big, and there is not much RAM in my laptop at all.

To fit as much as possible of the rating data into memory, I converted the data into a bunch of Erlang binaries. Erlang binaries are 'just' contiguous blocks of bytes, so they do not incur the overhead of list cons cells or dictionary indexing. They are fast, but pretty simple, and using them to store data like C arrays means that you have to write your own code to manage access to them. (At least you will get a runtime error if you try to access past the end of a binary, which is a major step up from C.)

While wandering through the wasteland that is my code I happened to notice that one section did not smell quite right. This code was taking a chunk of that binary data, converting the chunk into a list of elements, and then mapping a function over this list of elements in order to return a list of function results. Creating that intermediate list seemed like a bit of unnecessary overhead - I would much rather iterate directly across the binary itself and save all of that extra list consing - but I could not find any binary:map-like function anywhere.

So I wrote my own.

It's not very big.

The original code had used a list_from_bin function to turn a binary into a list of elements:

% Convert a binary into a list of unsigned integer elements.
list_from_bin(Bin, BytesPerElem) ->
list_from_bin(Bin, BytesPerElem, BytesPerElem * 8, size(Bin), []).

list_from_bin(_Bin, _BytesPerElem, _BitsPerElem, 0, Results) -> Results;
list_from_bin(Bin, BytesPerElem, BitsPerElem, BytesOffset, Results) ->
BytesDiscard = BytesOffset - BytesPerElem,
<<_:BytesDiscard/binary,Element:BitsPerElem/unsigned-integer,_/binary>> = Bin,
list_from_bin(Bin, BytesPerElem, BitsPerElem, BytesDiscard, [Element|Results]).

And list_from_bin was used in this manner (with a trivial "2 * Elem" function):

[2 * N || N <- list_from_bin(<<1,2,3,4>>, 1)].
-> [2,4,6,8]

[2 * N || N <- list_from_bin(<<1,2,3,4>>, 2)].
-> [516,1544]

Note that list_from_bin iterates from the end of the binary backwards down to the beginning of the binary, so the list it builds is in the same order as the original binary and does not need reversing.

(If all of my elements were one byte long then I could have just used the standard Erlang binary_to_list/1 function, but sometimes the elements I used were two or three bytes in length. I should have probably included an initial clause of "list_from_bin(Bin, 1) -> list_from_binary(Bin);", but didn't think of it at the time.)

The new map_bin function maps a function over the elements in a binary, and returns a list of the function's results:

map_bin(Fun, Bin, BytesPerElem) ->
map_bin(Fun, Bin, BytesPerElem, 0, size(Bin) div BytesPerElem, []).

map_bin(_Fun, _Bin, _BytesPerElem, _BytesDiscard, 0, Results) -> lists:reverse(Results);
map_bin(Fun, Bin, BytesPerElem, BytesDiscard, CountElemRemain, Results) ->
<<_:BytesDiscard/binary,Elem:BytesPerElem/binary,_/binary>> = Bin,
map_bin(Fun, Bin, BytesPerElem, BytesDiscard + BytesPerElem, CountElemRemain - 1, [Fun(Elem)|Results]).

The main function (map_bin/3) takes these arguments:
  • Fun: A function that takes a single binary element as input. May return any value.
  • Bin: The original binary data block. Its size should be an exact multiple of BytesPerElem. If the size of the binary is not an exact multiple of the BytesPerElem value then any excess bytes at the end of the binary are discarded.
  • BytesPerElem: The number of bytes taken up by each element in the binary.

The helper function (map_bin/6) takes three additional arguments:
  • BytesDiscard: The number of bytes to skip at the beginning of the binary, for the current iteration.
  • CountElemRemain: The number of elements remaining to process.
  • Results: An accumulated, reversed list of the function's results.

The BytesDiscard argument was added to avoid having to recalculate the number of bytes to skip for every iteration (with, for example, something like "BytesDiscard = Offset * BytesPerElem"). I am not sure if this was a good decision or if it reeks too much of premature optimisation. Old C habits die hard.

CountElemRemain starts at the number of elements to process and decrements each iteration so the terminating condition can be written simply as 0, rather than having to have a "when Offset > CountElems" guard on the function clause.

And here is map_bin in action:

map_bin(fun(<< Elem:8>>) -> 2 * Elem end, <<1,2,3,4>>, 1).
-> [2,4,6,8]

map_bin(fun(<< Elem:16>>) -> 2 * Elem end, <<1,2,3,4>>, 2).
-> [516,1544]

Now with the new map_bin function my code can skip the creation of an intermediate list, and, quite entirely by accident, is actually more flexible than before. The original code always produced lists of unsigned integers from the binaries; the new code can be used to operate on multiple elements. For example:

map_bin(fun(<< Elem1:8,Elem2:16>>) -> Elem1 + Elem2 end, <<1,2,3,4,5,6>>, 3).
-> [516,1290]

It's just not quite as nice to look at as a good list comprehension.


The map_bin function is all well and good, but the real question we all want answered is... does using this new code actually make our program run any faster?

Well, according to my very informal use of now/0 and timer:now_diff/2, with large binaries and a trivial "x2" function for each element, map_bin seems to be around 11% faster than using list_from_bin. That's... not too bad. But we could go faster with multiple processes, I think.


Really, what is the point of using Erlang if we don't spawn a few hundred processes for every trivial piece of code?

Luckily for my fingers it is only a minor modification to make a parallel version of map_bin:

pmap_bin(Fun, Bin, BytesPerElem) ->
pmap_bin(Fun, Bin, BytesPerElem, 0, size(Bin) div BytesPerElem, []).

pmap_bin(_Fun, _Bin, _BytesPerElem, _BytesDiscard, 0, Pids) ->
[receive {done, Pid, Result} -> Result end || Pid <- lists:reverse(Pids)];
pmap_bin(Fun, Bin, BytesPerElem, BytesDiscard, CountElemRemain, Pids) ->
PidThis = self(),
<<_:BytesDiscard/binary,Elem:BytesPerElem/binary,_/binary>> = Bin,
pmap_bin(Fun, Bin, BytesPerElem, BytesDiscard + BytesPerElem, CountElemRemain - 1,
[spawn(fun() -> PidThis ! {done, self(), Fun(Elem)} end)|Pids]).

Sadly, I cannot comment much on the speed difference of this conversion because I do not (yet) have access to a multi-core machine. A routine like this is probably best avoided for a single-CPU system as the overhead of spawning many processes would be wasted, but it should perform better on multiple-CPU systems. (Feel free to try it out and let me know how it goes!)

There is still room for improvement in this function, if we wanted to take it further. We may not want to spawn a separate process for each element, for instance, but rather split the original binary into N chunks and spawn a process to apply a function to each chunk. Also, we might want to expand on the parallelism and include other nodes into the mix, to spread the calculations across multiple machines.

Something for another day.

And now someone is going to tell me that binary comprehensions have been available in the standard Erlang distribution for a while, and I just haven't been paying enough attention to the Erlang mailing-list announcements.

Friday, 22 June 2007

Erlang Macro Oddness

I found a little oddity with Erlang macros while I was writing version 2 of the Weighted Slope One algorithm. It seems that passing a multi-statement anonymous function as a parameter into a macro confuses the compiler.

For example, this code works:

-define(A_MACRO(FunAnon), apply(FunAnon, [])).

start() ->
fun() ->
io:format("Single-element function works fine.~n")

But this code produces a compile-time error:
-define(A_MACRO(FunAnon), apply(FunAnon, [])).

start() -> ?A_MACRO(
fun() ->
io:format("Multiple-element function "),
io:format("does not compile.~n")

An "argument mismatch for macro ''A_MACRO''" error, to be precise.

Interestingly, multiple statements in a begin..end block seem to be okay:

-define(A_MACRO(FunAnon), apply(FunAnon, [])).

start() -> ?A_MACRO(
fun() ->
io:format("Multiple-element function "),
io:format("with a begin..end block is okay.~n")

Something to keep an eye out for.

Wednesday, 20 June 2007

Collaborative Filtering: Weighted Slope One in Erlang (v2)

Okay, so my initial Weighted Slope One Erlang translation wasn't very Erlang-ish... not a single spawn in sight and side-effects galore. Yuck.

I've ripped out ETS and replaced it with dictionaries, and modified the build_matrix loops to palm off the real work to spawned processes rather than do it themselves.

The main change was with the build_matric function. A long-running process is spawned for every item in the system (the 'X' item in the {x, y} difference/frequency matrix), and short-term user rating processes are spawned to send off difference and frequency information to the relevant item and wait for confirmation that the message was received. Once all of the data has been sent the item processes are asked to return their individual dictionaries to the main process, which then merges them into one big dictionary.

The item processes die naturally after they return their dictionaries, and the user rating processes only live long enough to ensure that their data is received by the relevant item process.

The only other significant changes to the program were to make the predict function use a dictionary rather than an ETS table.

The concurrent bits of this program use this idiom[*]:

process_flag(trap_exit, true),
{'EXIT', Pid, normal} -> ok;
{'EXIT', Pid, Reason} -> exit(self(), Reason)
|| Pid <- [spawn_link(fun() -> >>do something<< end)
|| >>value/s <- list of value/s<<]]

In other words: spawn a process to do something for every element in the list of values, and wait for each of those processes to signal that they have finished normally. (I used a substitution macro to avoid typing in all that boilerplate.)

An unfortunate consequence of this modification is that the code is about 50% larger than the earlier Erlang version:

%% slopeone2.erl
% Philip Robinson
% A parallel implementation of the weighted slope one
% algorithm in Erlang for item-based collaborative
% filtering.
% Based on the same algorithm in Java by Daniel Lemire and Marco Ponzi:


-define(SPAWN_AND_WAIT(Block, Data),
process_flag(trap_exit, true),
{'EXIT', Pid, normal} -> ok;
{'EXIT', Pid, Reason} -> exit(self(), Reason)
|| Pid <- [spawn_link(fun() -> Block end)
|| Data]]).

start() ->
% The rating database: A list of users, each containing a list of {item, rating} elements.
Items = [{item1,"candy"}, {item2,"dog"}, {item3,"cat"}, {item4,"war"}, {item5,"strange food"}],
DataRating = [{user1, "Bob", [{item1,1.0}, {item2,0.5}, {item4,0.1}]},
{user2, "Jane", [{item1,1.0}, {item3,0.5}, {item4,0.2}]},
{user3, "Jo", [{item1,0.9}, {item2,0.4}, {item3,0.5}, {item4,0.1}]},
{user4, "StrangeJo", [{item1,0.1}, {item4,1.0}, {item5,0.4}]}],
% The difference & frequency database: a dictionary of {{item X, itemY}, {diff, freq}}.
% Create the predictor engine.
DiffFreq = build_matrix(Items, DataRating),
io:format("Here's the data I have accumulated...~n"),
print_data(Items, DataRating, DiffFreq),
io:format("Ok, now we predict...~n"),
TestRatings1 = [{item5,0.4}],
print_user_ratings(Items, TestRatings1),
print_user_ratings(Items, predict(Items, TestRatings1, DiffFreq)),
TestRatings2 = [{item4,0.2}|TestRatings1],
print_user_ratings(Items, TestRatings2),
print_user_ratings(Items, predict(Items, TestRatings2, DiffFreq)),

% Based on existing data, and using weights, try to predict all missing ratings.
% The trick to make this more scalable is to consider only difference entries
% having a large (> 1) frequency entry.
% Precondition: ItemRatings is sorted as per lists:sort/1 (to re-merge actual ratings).
predict(Items, ItemRatings, DiffFreq) ->
% Gather the sum of the rating differences and frequencies for all available items given the known item ratings.
RatingsPredicted = lists:foldl(
fun({IdItemX, IdItemY, RatingX}, Dict) ->
case dict:find({IdItemY, IdItemX}, DiffFreq) of
error -> Dict;
{ok, {Diff, DFreq}} ->
fun({Pred, PFreq}) -> {Pred + ((RatingX + Diff) * DFreq), PFreq + DFreq} end,
{(RatingX + Diff) * DFreq, DFreq},
[{IdItemX, IdItemY, RatingX}
|| {IdItemX, RatingX} <- ItemRatings,
{IdItemY, _} <- Items]),
% Put the original (actual) ratings back into our prediction list.
RatingsPredictedAndActual = lists:foldl(
fun({Item,Rating}, Ratings) -> dict:store(Item, {Rating, 1}, Ratings) end,
RatingsPredicted, ItemRatings),
% Divide the total rating difference by the frequency and return as a list.
[{Item, Rating / Freq} || {Item, {Rating, Freq}} <- dict:to_list(RatingsPredictedAndActual)].

print_data(Items, DataRating, DiffFreq) ->
[begin io:format("~s~n", [Name]), print_user_ratings(Items, ItemRatings) end
|| {_Id, Name, ItemRatings} <- DataRating],
[print_item_diffs(Items, Item, DiffFreq) || Item <- Items],

print_user_ratings(Items, ItemRatings) ->
[begin {value, {Item, NameItem}} = lists:keysearch(Item, 1, Items),
io:format(" ~12s --> ~4.2f~n", [NameItem, Rating]) end
|| {Item, Rating} <- lists:sort(ItemRatings)].

print_item_diffs(Items, {ItemX, Name}, DiffFreq) ->
io:format("~n~12s:", [Name]),
[case dict:find({ItemX, ItemY}, DiffFreq) of
error -> io:format(" ");
{ok, {Diff, Freq}} -> io:format(" ~6.3f ~1B", [Diff, Freq])
end || {ItemY, _} <- Items].

% Long-running itemX process to manage a dictionary of {{itemX, itemY}, {Diff, Freq}} entries.
dict_itemX(IdItemX, DictDiffFreq) ->
{data, PidSender, IdItemY, DiffNew} ->
PidSender ! {done, self(), IdItemY},
dict:update({IdItemX, IdItemY},
fun({DiffOld, FreqOld}) -> {DiffOld + DiffNew, FreqOld + 1} end,
{DiffNew, 1}, DictDiffFreq));
{results, PidParent} -> PidParent ! {results, self(), DictDiffFreq}

% Build a matrix (dictionary) of {{itemX, itemY}, Difference, Frequency} entries.
build_matrix(Items, DataRating) ->
PidThis = self(),
% Create a bunch of long-running processes to manage itemX data.
PidItems = dict:from_list(
[{IdItemX, spawn(fun() -> dict_itemX(IdItemX, dict:new()) end)} || {IdItemX, _NameItem} <- Items]),
% Spawn a short-term process for each user's ratings and wait until they are all done.
?SPAWN_AND_WAIT(process_user_ratings(PidItems, Ratings), {_, _, Ratings} <- DataRating),
% Retrieve and merge all the item process dictionaries, then divide all differences by their frequency.
fun(_Key, {Diff, Freq}) -> {Diff / Freq, Freq} end,
fun(_IdItemX, PidItemX, DictIn) ->
PidItemX ! {results, PidThis},
{results, PidItemX, DiffFreq} ->
dict:merge(fun(_, _, _) -> io:format("Key collision~n") end, DictIn, DiffFreq)
dict:new(), PidItems)).

process_user_ratings(PidItems, Ratings) ->
% Spawn a short-term process for each itemX rating and wait until they are all done.
?SPAWN_AND_WAIT(process_user_itemX_ratings(dict:fetch(IdItemX, PidItems), RatingX, Ratings),
{IdItemX, RatingX} <- Ratings).

process_user_itemX_ratings(PidItemX, RatingX, Ratings) ->
% Spawn a process for each itemY rating that sends a message to the long-running itemX process,
% waits for confirmation that its message has been processed, then signals that it is done.
PidItemX ! {data, self(), IdItemY, RatingX - RatingY},
receive {done, PidItemX, IdItemY} -> ok end
end, {IdItemY, RatingY} <- Ratings).

[*] If trapping exits is not your cup of tea then you could use plain old spawn like this:

PidThis = self(),
[receive {done, Pid} -> ok end
|| Pid <- [spawn(fun() ->
>>do something<<,
PidThis ! {done, self()}
|| >>value/s <- list of value/s<<]]

I had used this code until I thought of using spawn_link. I am not sure which version would be considered better by Erlang Gurus, but my personal preference leans towards spawn_link. Spawn_link seems easier to extend to handle multiple Erlang nodes, and if I was concerned about setting my main process to trap all exits then I could simply spawn a single process to manage all of the other spawn_linking.

Tuesday, 19 June 2007

Collaborative Filtering: Weighted Slope One in Erlang

I have been toying with the Netflix Challenge for a while now. It's fascinating stuff.

I knew nothing about collaborative filtering when I started this project, but that it pretty normal for my coding hobbies. (Hey, why would I start something new if I already knew how to do it?)

During my standard "collect and absorb everything" phase I ran across an article on Wikipedia that described the Slope One algorithm. This article had links to various implementations of the algorithm, including a standalone Java program by Daniel Lemire. More information on the Weighted Slope One algorithm may be found on Daniel's site.

I am not using Slope One in my current Netflix algorithm attempt, but I translated Daniel's Java code into Erlang as a learning exercise anyway:

%% slopeone.erl
% Philip Robinson
% A simple implementation of the weighted slope one
% algorithm in Erlang for item-based collaborative
% filtering.
% Based on the same algorithm in Java by Daniel Lemire and Marco Ponzi:


start() ->
% The rating database: A list of users, each containing a list of {item, rating} elements.
Items = [{item1,"candy"}, {item2,"dog"}, {item3,"cat"}, {item4,"war"}, {item5,"strange food"}],
DataRating = [{user1, "Bob", [{item1,1.0}, {item2,0.5}, {item4,0.1}]},
{user2, "Jane", [{item1,1.0}, {item3,0.5}, {item4,0.2}]},
{user3, "Jo", [{item1,0.9}, {item2,0.4}, {item3,0.5}, {item4,0.1}]},
{user4, "StrangeJo", [{item1,0.1}, {item4,1.0}, {item5,0.4}]}],
% The difference & frequency database: an ETS table of {{item X, itemY}, diff, freq}.
ets:new(diff_freq, [private, set, named_table]),
% Create the predictor engine.
io:format("Here's the data I have accumulated...~n"),
print_data(Items, DataRating),
io:format("Ok, now we predict...~n"),
TestRatings1 = [{item5,0.4}],
print_user_ratings(Items, TestRatings1),
print_user_ratings(Items, predict(Items, TestRatings1)),
TestRatings2 = [{item4,0.2}|TestRatings1],
print_user_ratings(Items, TestRatings2),
print_user_ratings(Items, predict(Items, TestRatings2)),

% Based on existing data, and using weights, try to predict all missing ratings.
% The trick to make this more scalable is to consider only diff_freq entries
% having a large (> 1) frequency entry.
predict(Items, ItemRatings) ->
PredFreq = ets:new(pred_freq, []),
[ets:insert(PredFreq, {Item, 0.0, 0}) || {Item, _} <- Items],
[[case {ets:match(diff_freq, {{ItemY, ItemX}, '$1', '$2'}),
ets:match(PredFreq, {ItemY, '$1', '$2'})} of
{[], _} -> ok;
{[[Diff, DFreq]], [[Pred, PFreq]]} ->
ets:insert(PredFreq, {ItemY, Pred + ((RatingX + Diff) * DFreq), PFreq + DFreq})
end || {ItemY, _} <- Items] || {ItemX, RatingX} <- ItemRatings],
ets:match_delete(PredFreq, {'_', '_', 0}), % Remove all zero-frequency predictions.
[ets:insert(PredFreq, {Item, Rating, 1}) || {Item, Rating} <- ItemRatings], % Re-insert actual ratings.
Results = [{Item, Rating / Freq} || {Item, Rating, Freq} <- ets:tab2list(PredFreq)],

print_data(Items, DataRating) ->
[begin io:format("~s~n", [Name]),
print_user_ratings(Items, ItemRatings) end
|| {_Id, Name, ItemRatings} <- DataRating],
[print_item_diffs(Items, Item) || Item <- Items],

print_user_ratings(Items, ItemRatings) ->
[begin {value, {Item, NameItem}} = lists:keysearch(Item, 1, Items),
io:format(" ~12s --> ~4.2f~n", [NameItem, Rating]) end
|| {Item, Rating} <- lists:sort(ItemRatings)].

print_item_diffs(Items, {Item, Name}) ->
io:format("~n~12s:", [Name]),
[case ets:match(diff_freq, {{Item, Id}, '$1', '$2'}) of
[] -> io:format(" ");
[[Diff, Freq]] -> io:format(" ~6.3f ~1B", [Diff, Freq])
end || {Id, _} <- Items].

% Build a matrix (ETS table) of {{itemX, itemY}, Difference, Frequency} entries.
build_matrix(DataRating) ->
% Gather the sum difference and the total count (frequency).
[[[case ets:lookup(diff_freq, {ItemX, ItemY}) of
[] -> ets:insert(diff_freq, {{ItemX, ItemY}, RatingX - RatingY, 1});
[{Key, Diff, Freq}] -> ets:insert(diff_freq, {Key, Diff + RatingX - RatingY, Freq + 1})
end || {ItemY, RatingY} <- ItemRatings]
|| {ItemX, RatingX} <- ItemRatings]
|| {_, _, ItemRatings} <- DataRating],
% Divide sum of difference by frequency to get mean difference.
DivideFun = fun({Key, Diff, Freq}, _) -> ets:insert(diff_freq, {Key, Diff / Freq, Freq}) end,
ets:foldl(DivideFun, undefined, diff_freq).

Some musings:

* I do not really consider this code to be a typical "Erlang-style" program. In particular, I am making substantial use of side-effects via ETS tables; comparisons between this code and the Java original will probably not be representative of comparisons between Erlang and Java programs in general.

* I may have gone a little overboard with the use of list comprehensions. I did not really like LCs when first exposed to them and it seems that I am overcompensating for that now.

* While some functions are obviously shorter in this Erlang version (compare build_matrix and buildDiffMatrix, for example), I am not convinced that they are necessarily clearer in Erlang than in Java. At least one advantage of smaller functions is that I can fit more of them on my 8.9" screen, but if that was my main concern then I would be programming in something even less verbose.

* While I haven't delved into this much, I suspect that using ETS has made this program harder to parallelise effectively. While the loops could easily be converted to use a parallel map algorithm, all CRUD activity still has to go through a single ETS process bottleneck. One possible way of utilising multiple CPUs might be to spawn a bunch of processes to manage individual items, send each of these messages regarding specific ratings, and then convert the results into a dictionary via a list comprehension of receive statements and dict:from_list/1.

* I did run the Dialyzer over the code but I have not even considered optimising it for performance. It will be slower than the Java version. :-)

Sunday, 22 April 2007

Erlang Macro Processor (v2), Part V

The final step for EMP2 is to expand any remote macro function calls and insert the results back into the AST.

Naively we would just follow the same pattern as the macro attribute expansion that we have just added:

node_parse(Node={call,Line,{remote,_,{atom,_,Mod},{atom,_,Fun}},Args}, Mods) ->
case lists:member(Mod, Mods) of
true ->
ast_from_results(lists:flatten([apply(Mod,Fun,Args)|" "]), Line, []);
false -> setelement(4,Node,node_parse(Args, Mods))
But if we do, we find that there are three(!) problems with this approach.

Firstly, ast_from_results is currently using erl_parse:parse_form to turn the textual macro results into an AST. This only works for complete Erlang forms (function definitions) and not for, say, a set of three Erlang expressions to be inserted into a function. We can fix this by using erl_parse:parse_exprs instead, but we will also have to append a full-stop and space to the result string (instead of just a space) to get it to work properly.

Secondly, the arguments for the function call are all in AST format with tuples and line numbers everywhere. We cannot just apply the function directly to these arguments; we need to convert them back to something more usable.

Finally, we may receive more than one Erlang expression from the macro. To fit these back into the space of a single node we have to wrap them in a block expression.

To tackle the first issue we need to update ast_from_results a little:
ast_from_results(FunParse, ResultsString, LineStart, ASTResults) ->
case remove_leading_whitespace(ResultsString) of
"" -> lists:flatten(lists:reverse(ASTResults));
String ->
{done,{ok,Tokens,LineEnd},StringRest} =
erl_scan:tokens([], String, LineStart),
{ok, AST} = erl_parse:FunParse(Tokens),
ast_from_results(FunParse, StringRest, LineEnd, [AST|ASTResults])

As an aside, you might like to have a closer look at that erl_parse:FunParse call.

Yes, instead of hard-coding a function call or adding an extra if statement, we are calling the erl_parse function via a variable whose value we will not know until run-time[1]. Doesn't thinking about that just make you go all tingly inside? No? Me neither. Of course.

We can now use ast_from_results for erl_parse:parse_form and erl_parse:parse_exprs situations with only a single additional "erl_parse function" argument.

For the second issue I am going to use a cheap and nasty hack. Because we are not (yet) supporting anything fancier than literal terms in the argument list, we can get away with this little bit of trickery to convert the arguments into something usable by our call to apply:
ArgsLiteral = [Value || {_Type,_Line,Value} <- Args]. 

The third issue is also very easily fixed by wrapping the call to ast_from_results in a block expression tuple. We should only do this if there is more than one node in the results list:
node_parse(Node={call,Line,{remote,_,{atom,_,Mod},{atom,_,Fun}},Args}, Mods) ->
case lists:member(Mod, Mods) of
true ->
ArgsLiteral = [Value || {_Type,_Line,Value} <- Args],
Results = lists:flatten([apply(Mod,Fun,ArgsLiteral)|". "]),
case length(Results) of
1 -> hd(Results);
_ -> {block,Line,ast_from_results(parse_exprs, Results, Line, [])}
false -> setelement(4,Node,node_parse(Args, Mods))

Oh, and of course we need to update the other node_parse function clause to include the new argument to ast_from_results:
node_parse({attribute,Line,macro,{Mod,Fun,Args}}, _Mods) ->
ast_from_results(parse_form, lists:flatten([apply(Mod,Fun,Args)|" "]), Line, []);

And with any luck we are done. Let's try it out on our example code.

1> CL = fun(F) -> c(F), l(F) end.
2> CL(emp2), CL(example_macro), CL(example).
{module, example}
3> [example:lookup(N) || N <- lists:seq(0, 3)]. [0,2,4,6] 4>

Yep. EMP2 is done.

The full listing:

-author("Philip Robinson").

parse_transform(AST, _Options) ->
Mods = lists:flatten([Mods || {attribute,_Line,macro_modules,Mods} <- AST]),
lists:flatten([node_parse(Node, Mods) || Node <- AST]).
node_parse({attribute,Line,macro,{Mod,Fun,Args}}, _Mods) ->
ast_from_results(parse_form, lists:flatten([apply(Mod,Fun,Args)|" "]), Line, []);
node_parse(Node={call,Line,{remote,_,{atom,_,Mod},{atom,_,Fun}},Args}, Mods) ->
case lists:member(Mod, Mods) of
true ->
ArgsLiteral = [Value || {_Type,_Line,Value} <- Args],
Results = lists:flatten([apply(Mod,Fun,ArgsLiteral)|". "]),
case length(Results) of
1 -> hd(Results);
_ -> {block,Line,ast_from_results(parse_exprs,Results,Line,[])}
false -> setelement(4,Node,node_parse(Args, Mods))
node_parse(Node, Mods) when is_list(Node) ->
[node_parse(Element, Mods) || Element <- Node];
node_parse(Node, Mods) when is_tuple(Node) ->
list_to_tuple([node_parse(Element, Mods) || Element <- tuple_to_list(Node)]);
node_parse(Node, _Mods) -> Node.

args_from_ast(AST) -> [Value || {_Type,_Line,Value} <- AST].

ast_from_results(FunParse, ResultsString, LineStart, ASTResults) ->
case remove_leading_whitespace(ResultsString) of
"" -> lists:flatten(lists:reverse(ASTResults));
String ->
{done,{ok,Tokens,LineEnd},StringRest} =
erl_scan:tokens([], String, LineStart),
{ok, AST} = erl_parse:FunParse(Tokens),
ast_from_results(FunParse, StringRest, LineEnd, [AST|ASTResults])

remove_leading_whitespace([9 |String]) -> remove_leading_whitespace(String);
remove_leading_whitespace([10|String]) -> remove_leading_whitespace(String);
remove_leading_whitespace([32|String]) -> remove_leading_whitespace(String);
remove_leading_whitespace( String ) -> String.

EMP2: Entirely painful compile-time macros for functions and expressions, in 45 lines of obscure, uncommented, and unreadable Erlang code.
(If you think that this code is bad, wait until you see EMP3.)

[1] Run-time for EMP2 is, of course, compile-time for the module that we are using EMP2 to transform.

Erlang Macro Processor (v2), Part IV

Okay, now we are getting somewhere. Time to expand some macros!

To begin with we will start with something easy, like duplicating EMP1's functionality. We already have code from EMP1 to expand the -macro attribute entries, but unfortunately we cannot just cut-and-paste the EMP1 code into EMP2; our AST-walking is slightly different and we need to adjust ast_reversed_results:

ast_from_results(ResultsString, LineStart, ASTResults) ->
case remove_leading_whitespace(ResultsString) of
"" -> lists:reverse(ASTResults);
String ->
{done,{ok,Tokens,LineEnd},StringRest} =
erl_scan:tokens([], String, LineStart),
{ok, AST} = erl_parse:parse_form(Tokens),
ast_from_results(StringRest, LineEnd, [AST|ASTResults])

We change the -macro clause for node_parse to call the new function:

node_parse({attribute,Line,macro,{Mod,Fun,Args}}, _Mods) ->
ast_from_results(lists:flatten([apply(Mod,Fun,Args)|" "]), Line, []);

And that obscene remove_leading_whitespace function has returned:

remove_leading_whitespace([9 |String]) -> remove_leading_whitespace(String);
remove_leading_whitespace([10|String]) -> remove_leading_whitespace(String);
remove_leading_whitespace([32|String]) -> remove_leading_whitespace(String);
remove_leading_whitespace( String ) -> String.

The only difference between ast_from_results and ast_reversed_results is that ast_from_results keeps the resulting AST in the same order as the input ResultsString argument (it kindly reverses its already-reversed results for us before passing them back).

Unlike EMP1, EMP2 does NOT want to receive the results of the expanded AST in reversed order. We are not following the "build a list in reverse and then reverse the result" model for our AST (which works just fine for traversing the top level only), but rather using a recursive descent model for AST parsing. In this situation we need to keep the results in the order that they appear.

Now we have the EMP2 module reproducing the functionality of EMP1, and at only a few more lines of code. The only thing left to do is identify macro function calls, apply them, and insert the parsed results into the AST in place of the original call.


For remote function calls we have two situations to handle:
  • The remote function call is to a macro, and
  • The remote function call is not to a macro.

The easier case is when the remote function call is not to a macro function. We pretty much just want the default tuple node function to run on the node, but we cannot (easily) get there because this more-specific function clause will have intercepted the node before the default code gets a chance to run on it.

We could encapsulate the common default code in another function (or a substitution macro), but for simplicity's sake I will just build the required node in place with the setelement function. It is not a large amount of code:

node_parse(Node={call,Line,{remote,_,{atom,_,Mod},{atom,_,Fun}},Args}, Mods) ->
case lists:member(Mod, Mods) of
true ->
io:format("Function-level macro call: ~w~n", [Node]),
false -> setelement(4,Node,node_parse(Args, Mods))

Next up: The final installment - expanding remote macro function calls.

Erlang Macro Processor (v2), Part III

The top level of the AST is a list of nodes, rather than a node in its own right, so we might write our first attempt at an[other] AST walker like this:

parse_transform(AST, _Options) ->
Mods = lists:flatten([Mods || {attribute,_Line,macro_modules,Mods} <- AST]),
lists:flatten([node_parse(Node, Mods) || Node <- AST]).

node_parse(Node, _Mods) -> Node.

The parse_transform function calls node_parse on each top-level node in the AST. It calls lists:flatten on the result because - as we already know - the EMP1-variety of top-level macro expansion may return more than one function definition from a single macro call. These definitions all need to be at the same "height" as the others, so the resulting deep list of nodes needs to be flattened.

These two functions together will traverse the top level of the AST but not examine any sub-nodes. To do that we need to split the atom... er, node tuples, and parse each element in sequence:

node_parse(Node, Mods) when is_tuple(Node) ->
list_to_tuple([node_parse(Element, Mods) || Element <- tuple_to_list(Node)]).

Now if we were to compile and run this on our example.erl file we would get a big fat error... it turns out that not every element in a node tuple is actually another node tuple (but we already knew that, too). Some of the elements are lists, and some of them are atoms or integers. A few extra clauses should take care of these conditions:

node_parse(Node, Mods) when is_list(Node) ->
[node_parse(Element, Mods) || Element <- Node];
node_parse(Node, _Mods) -> Node.

Here is the whole module in one piece:


parse_transform(AST, _Options) ->
Mods = lists:flatten([Mods || {attribute,_Line,macro_modules,Mods} <- AST]),
lists:flatten([node_parse(Node, Mods) || Node <- AST]).

node_parse(Node, Mods) when is_list(Node) ->
[node_parse(Element, Mods) || Element <- Node];
node_parse(Node, Mods) when is_tuple(Node) ->
[Type,Line|ListElements] = tuple_to_list(Node),
Results = [node_parse(Element, Mods) || Element <- ListElements],
node_parse(Node, _Mods) -> Node.

And that is all we need to generically walk the entire AST.

Trapping the specific nodes we want to macro-expand is also rather trivial. We need to catch macro module attributes and remote function calls, and to do that we just add two new clauses to the node_parse function:

node_parse(Node={attribute,Line,macro,{Mod,Fun,Args}}, _Mods) ->
io:format("Line ~B: EMP1-style macro attribute found.~n", [Line]),
% Do macro-expansion of attribute's Mod, Fun, and Args values.
node_parse(Node={call,Line,{remote,L,{atom,_,Mod},{atom,_,Fun}},Args}, Mods) ->
io:format("Line ~B: EMP2-style remote function call found.~n", [Line]),
% Test whether the remote call is to a macro module.
% If so, expand it. Otherwise traverse node as usual.
setelement(4, Node, node_parse(Args, Mods));

Next up: Expanding the macros.

Saturday, 21 April 2007

Erlang Macro Processor (v2), Part II

You know we need to do it eventually, so let's get the boring "find -macro_modules attributes and store their values" bit out of the way so we can move on to some more interesting stuff. Here we go:


parse_transform(AST, _Options) ->
Mods = lists:flatten([Mods || {attribute,_Line,macro_modules,Mods} <- AST]),
io:format("Macro Modules: ~p~n", [Mods]),

Ah, whoops. One of those list comprehension thingies seems to have slipped into the parse_transform function. To get rid of it we just have to change that line into something like this:

    Mods = lists:flatten(lists:map(
fun ({attribute,_Line,macro_modules,Mods}) -> Mods;
(_Node) -> []

Hmmm. On second thoughts, maybe we should keep the list comprehension.

I believe that list comprehensions are a relatively new feature in Erlang so you may not see too many of them in existing code, but they really are worth learning. (Erlang is in good company: Python and Haskell have list comprehensions too.)

Back from that tangent and to the program at hand, we see that the macro module names are being stored in an ordinary list. I expect that only a few macro modules (probably only one at most) will be specified in any given module, and looking for an element in a one-element list is pretty quick, so we should not be needing the indexing overhead of a dictionary. I also don't particularly mind if someone specifies a macro module more than once, or if a specified macro module is never used. (If we were really concerned about duplicate macro module names then we could use one of the list module functions to easily remove them.)

We could also roll the gathering of the macro_modules attributes up into the AST-walking code, but conceptually it is nicer to keep it up here and out of the way. Also, as this code only traverses the very top level of the AST it should be quite quick. Pattern-matching one entry per module attribute and function definition is not a computationally expensive task.

Right, the boring stuff is done; let's get into parsing that AST.

As I briefly mused at the bottom of a previous Atomiser post:

Rather than consisting of a bunch of pattern matching clauses, the walk_ast function could be made "smarter" by transforming the given node tuple into a list, and applying some rules-based logic to the elements of that list (from the third element onwards).

I reckon we could give this a go and see where we end up. (Either it will work and we have learned something new, or it won't work and we will have learned something new, so it is a win-win situation either way.)

You might recall that the Atomiser walk_ast function had a clause for each node type. This was a great way for me to implement the Atomiser because I got to see the AST nodes that made up my programs, but in the end it has turned out to be a pretty ugly function.

Here are a few lines of the walk_ast function as a quick refresher (the substitution macro actually makes the code nicer than it could be):

?WALK_AST({call,_Line,_Fun,Args}, Args); % Handles local and module calls.
?WALK_AST({'case',_Line,Test,Clauses}, [Test|Clauses]);
?WALK_AST({'catch',_Line,Expr}, Expr);
?WALK_AST({char,_Line,_Char}, []);

And those clauses go on (and on!) for about forty different node types...

I would much rather only have specific clause for handling each node that we are interested in, and use some generic code to handle the rest. But if we want to create some rules to manage these nodes generically then we had better find some patterns in all of that mess.


The first (and blindingly obvious) thing to notice about the nodes is that - without exception - they are all tuples. (I know, I know: I am a genius. Applause is not strictly necessary. Really. Oh, all right then, a little bit of applause is okay, if you insist.)

Two of these tuple nodes are not quite the same as the others: {error, Details} and {warning, Details}. In all of the other nodes the first element is the node type and the second element is the line number of the source file that the node appears in. After that there are a variable number of elements (possibly none) with node-specific meanings.

We are interested in catching -macro attributes (so EMP2 can do the work of EMP1) as well as remote function call nodes that are calling a macro function. Everything else is irrelevant, except that we want to recursively descend into sub-nodes to keep searching for other remote macro function calls.

If we take a closer look at the elements of nodes we will note that the element is always either a list, a tuple, or atomic (i.e.: an atom or an integer). These elements might have a special meaning to the compiler (depending on their location in the current node tuple) but to us they are just potential sub-nodes. If the node does not match an attribute or remote function call pattern then the elements have no meaning to EMP2 and we can treat them as homogenous lumps of node matter.

Of the additional elements in a node (if any), they are either
  • a list, which we can parse as its own sub-AST,
  • a tuple, which we can parse as another node, or
  • atomic (or integer), which we can pass back as-is.

I think that all of these notes are probably enough to get us started coding.

Thursday, 19 April 2007

Erlang Macro Processor (v2), Part I

EMP1 is all well and good, but it does have more than its fair share of idiosyncratic behaviour[1]:

  • EMP1 can only be used to create full functions at the top level of a module. This makes it a bit more difficult to use than strictly necessary, especially if we only want to generate a term to use within a function.
  • Arguments passed to the macro must be literal values - no function calls allowed!
  • Macros must be defined in a separate module, which must be compiled before the macro-using module is compiled.
Quite frankly that first point bugs the hell out of me. I really should not have to write a macro that returns an entire function definition if I only need to generate a small portion of a function.

Today we will begin to tackle this issue with EMP2, but before we dive straight into the parse_transform code I would like to spend a few moments updating our example code. The rewrite will make the example_macro.erl and example.erl modules use the as-yet-unwritten EMP2 module functionality. I probably won't explicitly show it in these posts, but the compile errors I get from running EMP2 over example.erl will have a big influence over the direction that its development takes.

We will still need a separate macro module, but the macro function will only generate the lookup table itself rather than return a whole function definition:


lookup_binary(Size) ->
    [[$,,FirstVal]|NumberString] = lists:map(
        fun(Offset) -> io_lib:format(",~B", [Offset * 2]) end,
        lists:seq(0, Size - 1)),
    "<<" ++ [FirstVal] ++ NumberString ++ ">>".

We have lost the code that produces the whole function and only kept our lookup binary creation function, which also seems to have picked up a jaunty little Size argument from somewhere. As before, each element's value is twice its offset (modulo 256: we are only storing bytes after all).

To check that the new macro code works correctly:

1> CL = fun(F) -> c(F), l(F) end.
2> CL(example_macro).
3> M = fun(R) -> io:format("~s~n", [lists:flatten(R)]) end.
4> M(example_macro:lookup_binary(4)).

And we also have to rewrite the module that calls this lookup macro:


-compile({parse_transform, emp2}).

lookup(Offset) ->
    <<_:offset/binary,value:8/integer,_/binary>> =

This does look a lot nicer than the EMP1 version. Only the snippet of code that needs to be dynamically generated is in the macro module; the rest of the code is in the standard module where it belongs, and the macro call is in a much more appropriate place - inside the function that uses it - than lurking within a module attribute.

With EMP1 we had to peek inside another module to see that a lookup/1 function was being generated, but here we can see that fact already in front of us. We can even guess that a binary term will be created just from the context around the macro call.

Note that 'emp1' has changed to 'emp2' in the parse_transform compiler directive, and that we need a new 'macro_modules' module attribute to tell EMP2 which remote function calls are to be expanded at compile-time.

Once we have written EMP2 and compiled all the modules,we should be able to run the lookup function and receive the same results as we did before:

1> lists:map(fun(N) -> example:lookup(N) end, lists:seq(0, 3)).

We shall see.

[1] And I cannot have all that competition floating around out there, you know.

Tuesday, 17 April 2007

The Atomiser, Redux

I have received some great comments and suggestions regarding the Atomiser; as a result I have added a new (optional) feature to the module. (Don't worry - The Atomiser may be new and improved, but is still 100% backwardly-compatible!)

As usual, you may specify a list of globally-valid atoms:

-atoms([atom1, atom2...]).

You may now also specify function-specific atom lists in two ways. The first method is to add a function name (only) to an atoms declaration entry. The atoms specified will then be valid within all 'fun_name' functions, regardless of the arity of those function definitions:

-atoms({fun_name, [atom1, atom2...]}).

(Unfortunately we have to wrap this all information up in a single tuple: 'wild' module attributes can only contain one value.)

To be even more specific you may add a function name and an arity to an atoms declaration. These atoms will then be valid within that specific 'fun_name/arity' function definition:

-atoms({fun_name, arity, [atom1, atom2...]}).

Atoms declarations are cumulative: globally-valid atoms (if any) are included along with function and function/arity atoms when checking for valid atoms within a given function definition.

You might notice that in the code below I have added a few new clauses into the walk_ast function. I was a bit concerned that I may have missed some node types from the Erlang AST, so I cracked open the only reference I had seen of the Abstract Format and added a few more function clauses that I had initially overlooked. I am pretty sure that just about everything is in there now, but feel free to disabuse me of that notion. :-)

Finally, I cleaned up the ?WALK_AST macro a little so that it no longer requires a list of ASTs to process: it now works directly off a single AST. Removing embedded lists has simplified the use of this macro quite considerably.

The new Atomiser Module:

-author("Philip Robinson").
%-compile({parse_transform, atomiser}). % Uncomment after initial compile.

-atoms([base_dict_key,error, ok]). % Atoms used in four or more functions.
-atoms({atoms_check, 5, [found]}).
-atoms({atoms_unused_print, 1, [found]}).
-atoms({key_more_general, 1, [function]}).
-atoms({parse_transform, 2, [report_warnings,true]}).
-atoms({walk_ast, 3, [atom, atoms, attribute, b_generate, bc, bin, bin_element,
        block, call, 'case', 'catch', char, clause, clauses, cons, eof, float,
        'fun', function, generate, 'if', integer, lc, match, nil, op, 'query',
        'receive', record, record_field, string, 'try', tuple, var, warning]}).

parse_transform(AST, Options) ->
    DictAtomsAll = dict:store(base_dict_key, dict:new(), dict:new()),
    case lists:member(report_warnings, Options) of
        true -> atoms_unused_print(walk_ast(AST, base_dict_key, DictAtomsAll));
        _ -> ok

dict_with_added_atoms(Line, AtomList, DictInitial) ->
    AddAtom = fun(Atom, Dict) ->
        case dict:find(Atom, Dict) of
            {ok,LineAlreadyDefined} ->
                    "~s:~B Warning: atom '~w' already defined on line ~B.~n",
                    [?FILE, Line, Atom, LineAlreadyDefined]),
            error -> dict:store(Atom, Line, Dict)
    lists:foldl(AddAtom, DictInitial, AtomList).

atoms_from_attr(Line, Key, AtomList, Atoms) ->
    Dict = case dict:find(Key, Atoms) of {ok,D} -> D; error -> dict:new() end,
    dict:store(Key, dict_with_added_atoms(Line, AtomList, Dict), Atoms).

atoms_check(Atom, Line, KeyDict, Atoms) ->
    case dict:find(KeyDict, Atoms) of
        {ok,Dict} -> atoms_check(Atom, Line, KeyDict, Dict, Atoms);
        error -> atoms_check(Atom, Line, key_more_general(KeyDict), Atoms)

atoms_check(Atom, Line, KeyDict, Dict, Atoms) ->
    case dict:find(Atom, Dict) of
        {ok,found} -> Atoms;
        {ok,_LineDefinedOn} ->
            dict:store(KeyDict, dict:store(Atom,found,Dict), Atoms);
        error ->
            case KeyDict of
                base_dict_key ->
                    io:format("~s:~B Warning: atom '~w' unexpected.~n",
                        [?FILE, Line, Atom]),
                _ -> atoms_check(Atom, Line, key_more_general(KeyDict), Atoms)

key_more_general({function,Fun,_Arity}) -> {function,Fun};
key_more_general({function,_Fun}) -> base_dict_key.

atoms_unused_print(Atoms) ->
    Filter = fun({_Atom,Line}) -> Line =/= found end,
    DictsToList = fun({_DictKey,Dict}, UnusedAtoms) ->
        UnusedAtomsNew = lists:filter(Filter, dict:to_list(Dict)),
        UnusedAtomsNewSorted = lists:keysort(2, UnusedAtomsNew),
        lists:keymerge(2, UnusedAtomsNewSorted, UnusedAtoms)
    UnusedAtoms = lists:foldl(DictsToList, [], dict:to_list(Atoms)),
    PrintUnusedAtom = fun({Atom,Line}) ->
        io:format("~s:~B Warning: atom '~w' unused.~n", [?FILE, Line, Atom])
    lists:foreach(PrintUnusedAtom, UnusedAtoms).

-define(WALK_AST(PatternToMatch, ExpressionsToProcess),
    walk_ast([PatternToMatch|ASTRest], Key, Atoms) ->
        walk_ast(ASTRest, Key, walk_ast(ExpressionsToProcess, Key, Atoms))).

walk_ast([], _Key, Atoms) -> Atoms;
walk_ast([{atom,Line,Atom}|RestAST], Key, Atoms) ->
    walk_ast(RestAST, Key, atoms_check(Atom, Line, Key, Atoms));
walk_ast([{attribute,Line,atoms,{Fun,Arity,AtomList}}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, {function,Fun,Arity}, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
walk_ast([{attribute,Line,atoms,{Fun,AtomList}}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, {function,Fun}, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
walk_ast([{attribute,Line,atoms,AtomList}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, base_dict_key, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
?WALK_AST({attribute,_Line,_Name,_Value}, []); % Ignore all other attributes.
?WALK_AST({b_generate,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({bc,_Line,Head,Tail}, [Head|Tail]);
?WALK_AST({bin,_Line,BinElements}, BinElements);
?WALK_AST({bin_element,_Line,_Name,_Size,_Type}, []);
?WALK_AST({block,_Line,Expr}, [Expr]);
?WALK_AST({call,_Line,_Fun,Args}, Args); % Handles local and module calls.
?WALK_AST({'case',_Line,Test,Clauses}, [Test|Clauses]);
?WALK_AST({'catch',_Line,Expr}, Expr);
?WALK_AST({char,_Line,_Char}, []);
walk_ast([{clause,_Line,Pattern,Guards,Body}|RestAST], Key, Atoms) ->
    AtomsGuard = lists:foldl(
        fun(ASTGuard, AtomsGuard) ->
            walk_ast(ASTGuard, Key, AtomsGuard)
        walk_ast(Pattern, Key, Atoms), Guards),
    walk_ast(ASTRest, Key, walk_ast(Body, Key, AtomsGuard));
?WALK_AST({cons,_Line,Left,Right}, [Left,Right]);
?WALK_AST({eof,_Line}, []);
?WALK_AST({error,_Details}, []); % Ignore compiler errors.
?WALK_AST({float,_Line,_Float}, []);
?WALK_AST({'fun',_Line,{clauses,Clauses}}, Clauses);
?WALK_AST({'fun',_Line,_ModuleFunArity}, []);
walk_ast([{function,_Line,Fun,Arity,Clauses}|RestAST], Key, Atoms) ->
    walk_ast(RestAST, Key, walk_ast(Clauses, {function,Fun,Arity}, Atoms));
?WALK_AST({generate,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({'if',_Line,Clauses}, Clauses);
?WALK_AST({integer,_Line,_Integer}, []);
?WALK_AST({lc,_Line,Head,Tail}, [Head|Tail]);
?WALK_AST({match,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({nil,_Line}, []);
?WALK_AST({op,_Line,_BinaryOp,Left,Right}, [Left, Right]);
?WALK_AST({op,_Line,_UnaryOp,_Operand}, []);
?WALK_AST({'query',_Line,ListComprehension}, [ListComprehension]);
?WALK_AST({'receive',_Line,Clauses}, Clauses);
?WALK_AST({'receive',_Line,Clauses1,_TimeAfter,Clauses2}, Clauses1 ++ Clauses2);
?WALK_AST({record,_Line,_Record,Fields}, Fields);
?WALK_AST({record_field,_Line,Field,Value}, [Field, Value]);
?WALK_AST({record_field,_Line,_Variable,_Record,Field}, [Field]);
?WALK_AST({string,_Line,_String}, []);
            [Block] ++ CaseClauses ++ CatchClauses ++ AfterClauses);
?WALK_AST({tuple,_Line,Elements}, Elements);
?WALK_AST({var,_Line,_Name}, []);
?WALK_AST({warning,_Details}, []); % Ignore compiler warnings.
walk_ast([Node|ASTRest], Key, Atoms) ->
    io:format("Unknown node: ~p~n", [Node]),
    walk_ast(ASTRest, Key, Atoms).

PS: Does anyone know of an easy way to get Blogger to indent code properly? I am getting a little tired of pasting loads of "&nbsp;" everywhere...

Sunday, 15 April 2007

"Dynamic" record access functions with EMP1

Brian Olsen (over at Programming Experiments) wrote a small set of functions to make record accesses/updates in Erlang nicer. Ayrnieu wrote a detailed response to this in a comment on Reddit.

Brian wanted to hide some of the (admittedly pretty ugly) syntax of Erlang records in a simple way. He used some run-time list-searching to find the position in the record tuple that a particular field name occurs at, and then located the desired value at that position.

Now that we have EMP1 working I thought that perhaps I might see how I would use this particular tool to solve the same problem.

First of all we need to figure out what the functions we want should look like. I think something like this would do nicely:

recval(FieldName, Record) -> Value.
setrecval(FieldName, Record, Value) -> Updated Record.

Of course under the covers recval and setrecval would examine the record given and work out which field to retrieve / update.

Both Brian and Ayrneiu have this work done at run-time. With EMP1 we can build the supporting functions at compile-time based on the record information (which is already known at compile-time).

In detail, recval and company would look something like this:

recval(FieldName, Record) -> recval(element(1, Record), FieldName, Record).
recval(record1, field1, Record) -> element(2, Record);
recval(record1, field2, Record) -> element(3, Record);
recval(record2, field1, Record) -> element(2, Record);

...and similarly for the setrecval versions.

These functions can all be created at compile-time with EMP1, like this:


recval_field(NameRecord, NameField, Posn) ->
    io_lib:format("recval(~w, ~w, Record) -> element(~B, Record)",
        [NameRecord, NameField, Posn]).

setrecval_field(NameRecord, NameField, Posn) ->
        "setrecval(~w, ~w, Record, Value) -> setelement(~B, Record, Value)",
        [NameRecord, NameField, Posn]).

recval_record(RecordDetails) -> recval_record(RecordDetails, 2, []).
recval_record({_NameRecord, []}, _Posn, Text) -> Text;
recval_record({NameRecord, [NameField|NameFieldsRest]}, Posn, Text) ->
    recval_record({NameRecord, NameFieldsRest}, Posn + 1,
        Text ++ "; " ++ recval_field(NameRecord, NameField, Posn)).

setrecval_record(RecordDetails) -> setrecval_record(RecordDetails, 2, []).
setrecval_record({_NameRecord, []}, _Posn, Text) -> Text;
setrecval_record({NameRecord, [NameField|NameFieldsRest]}, Posn, Text) ->
    setrecval_record({NameRecord, NameFieldsRest}, Posn + 1,
        Text ++ "; " ++ setrecval_field(NameRecord, NameField, Posn)).

recval_generate(ListRecordDetails) ->
    [$;,32|CodeGet] = lists:flatten(
        lists:map(fun(E) -> recval_record(E) end, ListRecordDetails)),
    [$;,32|CodeSet] = lists:flatten(
        lists:map(fun(E) -> setrecval_record(E) end, ListRecordDetails)),
    "recval(Field, Record) -> recval(element(1, Record), Field, Record). "
    "setrecval(Field, Record, Value) -> "
        "setrecval(element(1, Record), Field, Record, Value). " ++
        io_lib:format("~s. ~s.", [CodeGet, CodeSet]).

And here is a test program:

-compile({parse_transform, emp1}).

-record(data1, {this, that}).
-record(data2, {this, the_other}).

-macro({dyrec_macro, recval_generate,
    [[{data1, [this, that]}, {data2, [this, the_other]}]]}).

start() ->
    D1 = #data1{this=a, that=b},
    D2 = #data2{this=c, the_other=d},
    D3 = setrecval(this, D1, e),
        [recval(this, D1), recval(that, D1),
         recval(this, D2), recval(the_other, D2),

After compiling both of them, we can run this at the REPL:

1> dyrec_test:start().

Personally I would not use EMP1 for this (particular) purpose. I do not mind Erlang's record syntax, but if I really did not want to use it I would rather build a parse-transformation (a la Yariv's recless module) to convert a different syntax into the record tuples Erlang uses behind the scenes.

By layering function calls on top of record/tuple field accesses we destroy the ability of Erlang's compiler to convert the usual record syntax into direct tuple element lookups at the point of reference. With this approach the runtime now has to perform a pattern match on record and field names before finding the appropriate value. (Possibly this overhead could be removed by the use of the compiler's 'inline' option, though.)

So my verdict on this jaunt into using EMP1 for layering function calls on record accesses, is "certainly possible, but not necessarily practical". Wait for EMP2 and use that instead. :-)

Friday, 13 April 2007

Erlang Macro Processor (v1), Part IV

We know what we want, we know how we want to use it, and so without further ado, here it is: the code for EMP1.

-author("Philip Robinson").

parse_transform(ASTIn, _Options) -> walk_ast(ASTIn, []).

walk_ast([], ASTOut) -> lists:reverse(ASTOut);
walk_ast([{attribute,Line,macro,{Mod,Fun,Args}}|RestASTIn], ASTOut) ->
    ReversedResults =
            lists:flatten([apply(Mod,Fun,Args)|[" "]]),
            Line, []),
    walk_ast(RestASTIn, ReversedResults ++ ASTOut);
walk_ast([Node|ASTInRest], ASTOut) -> walk_ast(ASTInRest, [Node|ASTOut]).

ast_reversed_results(ResultsString, LineStart, ASTResults) ->
    case string_trim_whitespace(ResultsString) of
        "" -> ASTResults;
        String ->
            {done,{ok,Tokens,LineEnd},StringRest} =
                erl_scan:tokens([], String, LineStart),
            {ok, AST} = erl_parse:parse_form(Tokens),
            ast_reversed_results(StringRest, LineEnd, [AST|ASTResults])

string_trim_whitespace([ 9|String]) -> string_trim_whitespace(String);
string_trim_whitespace([10|String]) -> string_trim_whitespace(String);
string_trim_whitespace([32|String]) -> string_trim_whitespace(String);
string_trim_whitespace( String ) -> String.

And that is it - 30 lines of code.

No, really. That is all there is. I can take you through it in some detail, if you want. Fasten your seat-belts.

EMP1 In Detail

I will start with the "mostest ugliest" piece of Erlang code I have ever written: the string_trim_whitespace function.

This function returns the given string minus any leading tabs, carriage returns, or spaces. I searched the Erlang documentation and the Trap Exit website but I did not manage to find any built-in functions that achieved the same goal. Four lines of code seems a bit excessive for what it actually does and I am sure there must be a nicer way of writing it.

This function is actually a reasonably good example of Erlang pattern-matching and tail-recursion at work. If the given string begins with a tab (ASCII 9), carriage return (ASCII 10), or a space (ASCII 32), then it will match one of the first three function clauses. The first character will be dropped and the function recursively called with the rest of the string.

If the string does not match any of those three function clauses then it must not have a tab, carriage return, or space at the beginning, so the given string is returned as-is. This even works for the empty string. (Technically it would also match any non-string argument - integer, float, tuple, or whatever - and just return the input given.)

Even though the function uses recursion there is no danger of the stack blowing out no matter how large the string is. Erlang (like most functional languages) has a neat trick of turning tail recursion calls into a goto loop so the function executes in constant memory space. Others have explained tail-recursion better than I can, so let's move on...

Next on the list is the walk_ast function, which runs through the top level of the inbound AST and builds an outbound AST. The outbound AST list is built in reverse order to take advantage of the cheap list 'cons' operation: it is very quick to add or remove an element at the beginning of a list but much more expensive to add or remove an element at the end of a list. When the whole inbound AST has been processed (the argument matches the empty list) then the outbound AST is run through the lists:reverse function to switch it back to the right-way-around order again. If you are not yet familiar with this build-in-reverse-then-switch idiom, you soon will be. :-)

There are only three function clauses in this walk_ast function: The final case where we reverse and return the new AST, processing a 'macro' module attribute, and everything else.

The 'final' case I have covered above, and the 'everything else' case just passes the node straight to the outbound AST. The magic of EMP1 happens in the macro module attribute clause.

The walk_ast function looks for macro atributes of this form:

-macro({Module, Function, Args}).

When it finds a match it calls the module/function with the given args and captures the result, which should be a string representation of an Erlang function. It adds this string to the beginning of a list containing a single space and then flattens the total result.

A space is added to the end of the return string because erl_scan:tokens has a problem parsing something like "42." - it cannot tell if this is the beginning of a floating-point number. To avoid this I add a space to the end of the string; erl_scan:tokens knows that "42. " is just the integer 42.

The resulting string is also flattened because io_lib:format does some funny things when you use "~s" to embed a value string into a format string. For example, io_lib:format("ab~se", ["cd"]) produces [97,98,"cd",101] instead of an expected (in my opinion) "abcde". This might be okay for printing, which I presume flattens its input as it goes, but this is a terrible format for erl_scan to tokenise.

Once the macro's return string has been mutilated enough it is passed on to ast_reversed_results, for some further mangling.

The ast_reversed_results function does pretty much all the heavy lifting for the module. It takes in the current result string (a flattened text representation of one or more Erlang functions with a space at the end), the line the module attribute was declared, and the current AST list of processed results (in reversed order as per the functional programming idiom mentioned above).

The very first thing this function does is to strip leading whitespace characters from the input string, and test that result against the empty string.

For some reason erl_scan:tokens returns a {more, SomeWeirdStuff} tuple when it is handed a string of whitespace characters (and also when given the empty string). I have no idea what I should do with this result so I strip the leading whitespace characters out and test for the empty string instead.

If the stripped string is not empty then we want to tokenise and parse the first form (which should be a function definition), add the parsed results to the beginning of our AST list, and try again with the rest of the string (as it is possible to include more than one function definition in the macro return string).

If the stripped string is empty then there is nothing left to process and we can return the (reversed) AST of result. We keep these in reversed order because it is just pre-pended to the walk_ast's ASTOut, and it will all be re-reversed at the end.


EMP1 Epilogue and Notes

* An interesting 'feature' of EMP1 is that it may be used to create functions where the function name is programmatically generated. I am not sure why you might choose to create a whole bunch of separate, named functions over, say, creating one function with multiple clauses triggered by an atom argument, but EMP1 certainly makes it possible.

* I would recommend avoiding carriage returns in macro output strings. It does not actually break anything, but it tends to obfuscate the stack trace output of any runtime exceptions thrown from the generated code.

* One advantage of compile-time macros over run-time function-building techniques is that the usual compiler checks are run over the generated code. (The macro-created code is actually there at compile-time rather than appearing later at run-time.) I like to get my bug reports early, and if the compiler can complain then I don't need to wait for unit tests to raise an issue.

Using compile-time macros also means that static code analysis tools such as the Dialyzer will include the generated functions in its analysis and report.

There are, however, situations where not all of the information needed to create a function is available at compile-time. If you find yourself in such a predicament you might want to check out Yariv's smerl project, which makes it a lot easier to do runtime meta-programming.

I might need to use smerl when I write EMP2.

Obligatory legal stuff

Unless otherwise noted, all code appearing on this blog is released into the public domain and provided "as-is", without any warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In no event shall the author(s) be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the software or the use or other dealings in the software.