Tuesday 17 April 2007

The Atomiser, Redux

I have received some great comments and suggestions regarding the Atomiser; as a result I have added a new (optional) feature to the module. (Don't worry - The Atomiser may be new and improved, but is still 100% backwardly-compatible!)


As usual, you may specify a list of globally-valid atoms:

-atoms([atom1, atom2...]).

You may now also specify function-specific atom lists in two ways. The first method is to add a function name (only) to an atoms declaration entry. The atoms specified will then be valid within all 'fun_name' functions, regardless of the arity of those function definitions:

-atoms({fun_name, [atom1, atom2...]}).

(Unfortunately we have to wrap this all information up in a single tuple: 'wild' module attributes can only contain one value.)

To be even more specific you may add a function name and an arity to an atoms declaration. These atoms will then be valid within that specific 'fun_name/arity' function definition:

-atoms({fun_name, arity, [atom1, atom2...]}).

Atoms declarations are cumulative: globally-valid atoms (if any) are included along with function and function/arity atoms when checking for valid atoms within a given function definition.


You might notice that in the code below I have added a few new clauses into the walk_ast function. I was a bit concerned that I may have missed some node types from the Erlang AST, so I cracked open the only reference I had seen of the Abstract Format and added a few more function clauses that I had initially overlooked. I am pretty sure that just about everything is in there now, but feel free to disabuse me of that notion. :-)


Finally, I cleaned up the ?WALK_AST macro a little so that it no longer requires a list of ASTs to process: it now works directly off a single AST. Removing embedded lists has simplified the use of this macro quite considerably.


The new Atomiser Module:


-module(atomiser).
-author("Philip Robinson").
-vsn('1.1.1').
-export([parse_transform/2]).
%-compile({parse_transform, atomiser}). % Uncomment after initial compile.

-atoms([base_dict_key,error, ok]). % Atoms used in four or more functions.
-atoms({atoms_check, 5, [found]}).
-atoms({atoms_unused_print, 1, [found]}).
-atoms({key_more_general, 1, [function]}).
-atoms({parse_transform, 2, [report_warnings,true]}).
-atoms({walk_ast, 3, [atom, atoms, attribute, b_generate, bc, bin, bin_element,
        block, call, 'case', 'catch', char, clause, clauses, cons, eof, float,
        'fun', function, generate, 'if', integer, lc, match, nil, op, 'query',
        'receive', record, record_field, string, 'try', tuple, var, warning]}).

parse_transform(AST, Options) ->
    DictAtomsAll = dict:store(base_dict_key, dict:new(), dict:new()),
    case lists:member(report_warnings, Options) of
        true -> atoms_unused_print(walk_ast(AST, base_dict_key, DictAtomsAll));
        _ -> ok
        end,
    AST.

dict_with_added_atoms(Line, AtomList, DictInitial) ->
    AddAtom = fun(Atom, Dict) ->
        case dict:find(Atom, Dict) of
            {ok,LineAlreadyDefined} ->
                io:format(
                    "~s:~B Warning: atom '~w' already defined on line ~B.~n",
                    [?FILE, Line, Atom, LineAlreadyDefined]),
                Dict;
            error -> dict:store(Atom, Line, Dict)
            end
        end,
    lists:foldl(AddAtom, DictInitial, AtomList).

atoms_from_attr(Line, Key, AtomList, Atoms) ->
    Dict = case dict:find(Key, Atoms) of {ok,D} -> D; error -> dict:new() end,
    dict:store(Key, dict_with_added_atoms(Line, AtomList, Dict), Atoms).

atoms_check(Atom, Line, KeyDict, Atoms) ->
    case dict:find(KeyDict, Atoms) of
        {ok,Dict} -> atoms_check(Atom, Line, KeyDict, Dict, Atoms);
        error -> atoms_check(Atom, Line, key_more_general(KeyDict), Atoms)
        end.

atoms_check(Atom, Line, KeyDict, Dict, Atoms) ->
    case dict:find(Atom, Dict) of
        {ok,found} -> Atoms;
        {ok,_LineDefinedOn} ->
            dict:store(KeyDict, dict:store(Atom,found,Dict), Atoms);
        error ->
            case KeyDict of
                base_dict_key ->
                    io:format("~s:~B Warning: atom '~w' unexpected.~n",
                        [?FILE, Line, Atom]),
                    Atoms;
                _ -> atoms_check(Atom, Line, key_more_general(KeyDict), Atoms)
                end
        end.

key_more_general({function,Fun,_Arity}) -> {function,Fun};
key_more_general({function,_Fun}) -> base_dict_key.

atoms_unused_print(Atoms) ->
    Filter = fun({_Atom,Line}) -> Line =/= found end,
    DictsToList = fun({_DictKey,Dict}, UnusedAtoms) ->
        UnusedAtomsNew = lists:filter(Filter, dict:to_list(Dict)),
        UnusedAtomsNewSorted = lists:keysort(2, UnusedAtomsNew),
        lists:keymerge(2, UnusedAtomsNewSorted, UnusedAtoms)
        end,
    UnusedAtoms = lists:foldl(DictsToList, [], dict:to_list(Atoms)),
    PrintUnusedAtom = fun({Atom,Line}) ->
        io:format("~s:~B Warning: atom '~w' unused.~n", [?FILE, Line, Atom])
        end,
    lists:foreach(PrintUnusedAtom, UnusedAtoms).

-define(WALK_AST(PatternToMatch, ExpressionsToProcess),
    walk_ast([PatternToMatch|ASTRest], Key, Atoms) ->
        walk_ast(ASTRest, Key, walk_ast(ExpressionsToProcess, Key, Atoms))).

walk_ast([], _Key, Atoms) -> Atoms;
walk_ast([{atom,Line,Atom}|RestAST], Key, Atoms) ->
    walk_ast(RestAST, Key, atoms_check(Atom, Line, Key, Atoms));
walk_ast([{attribute,Line,atoms,{Fun,Arity,AtomList}}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, {function,Fun,Arity}, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
walk_ast([{attribute,Line,atoms,{Fun,AtomList}}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, {function,Fun}, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
walk_ast([{attribute,Line,atoms,AtomList}|RestAST], Key, Atoms) ->
    AtomsNew = atoms_from_attr(Line, base_dict_key, AtomList, Atoms),
    walk_ast(RestAST, Key, AtomsNew);
?WALK_AST({attribute,_Line,_Name,_Value}, []); % Ignore all other attributes.
?WALK_AST({b_generate,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({bc,_Line,Head,Tail}, [Head|Tail]);
?WALK_AST({bin,_Line,BinElements}, BinElements);
?WALK_AST({bin_element,_Line,_Name,_Size,_Type}, []);
?WALK_AST({block,_Line,Expr}, [Expr]);
?WALK_AST({call,_Line,_Fun,Args}, Args); % Handles local and module calls.
?WALK_AST({'case',_Line,Test,Clauses}, [Test|Clauses]);
?WALK_AST({'catch',_Line,Expr}, Expr);
?WALK_AST({char,_Line,_Char}, []);
walk_ast([{clause,_Line,Pattern,Guards,Body}|RestAST], Key, Atoms) ->
    AtomsGuard = lists:foldl(
        fun(ASTGuard, AtomsGuard) ->
            walk_ast(ASTGuard, Key, AtomsGuard)
            end,
        walk_ast(Pattern, Key, Atoms), Guards),
    walk_ast(ASTRest, Key, walk_ast(Body, Key, AtomsGuard));
?WALK_AST({cons,_Line,Left,Right}, [Left,Right]);
?WALK_AST({eof,_Line}, []);
?WALK_AST({error,_Details}, []); % Ignore compiler errors.
?WALK_AST({float,_Line,_Float}, []);
?WALK_AST({'fun',_Line,{clauses,Clauses}}, Clauses);
?WALK_AST({'fun',_Line,_ModuleFunArity}, []);
walk_ast([{function,_Line,Fun,Arity,Clauses}|RestAST], Key, Atoms) ->
    walk_ast(RestAST, Key, walk_ast(Clauses, {function,Fun,Arity}, Atoms));
?WALK_AST({generate,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({'if',_Line,Clauses}, Clauses);
?WALK_AST({integer,_Line,_Integer}, []);
?WALK_AST({lc,_Line,Head,Tail}, [Head|Tail]);
?WALK_AST({match,_Line,Pattern,Expression}, [Pattern, Expression]);
?WALK_AST({nil,_Line}, []);
?WALK_AST({op,_Line,_BinaryOp,Left,Right}, [Left, Right]);
?WALK_AST({op,_Line,_UnaryOp,_Operand}, []);
?WALK_AST({'query',_Line,ListComprehension}, [ListComprehension]);
?WALK_AST({'receive',_Line,Clauses}, Clauses);
?WALK_AST({'receive',_Line,Clauses1,_TimeAfter,Clauses2}, Clauses1 ++ Clauses2);
?WALK_AST({record,_Line,_Record,Fields}, Fields);
?WALK_AST({record_field,_Line,Field,Value}, [Field, Value]);
?WALK_AST({record_field,_Line,_Variable,_Record,Field}, [Field]);
?WALK_AST({string,_Line,_String}, []);
?WALK_AST({'try',_Line,Block,CaseClauses,CatchClauses,AfterClauses},
            [Block] ++ CaseClauses ++ CatchClauses ++ AfterClauses);
?WALK_AST({tuple,_Line,Elements}, Elements);
?WALK_AST({var,_Line,_Name}, []);
?WALK_AST({warning,_Details}, []); % Ignore compiler warnings.
walk_ast([Node|ASTRest], Key, Atoms) ->
    io:format("Unknown node: ~p~n", [Node]),
    walk_ast(ASTRest, Key, Atoms).



PS: Does anyone know of an easy way to get Blogger to indent code properly? I am getting a little tired of pasting loads of " " everywhere...

No comments:

Post a Comment

Obligatory legal stuff

Unless otherwise noted, all code appearing on this blog is released into the public domain and provided "as-is", without any warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In no event shall the author(s) be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the software or the use or other dealings in the software.