PH Logo
Logo intepreter modeled after UCB Logo.
/Users/paul/Documents/phlogo/core/Tokenizer.cpp
00001 /*
00002  *  Tokenizer.cpp
00003  *
00004  *  Author: Paul Hamilton
00005  *      Date: 4 Jul 2011
00006  *
00007  */
00008 #include "Tokenizer.h"
00009         
00010 #include "Exceptions.h"
00011 #include "Verbs.h"
00012 
00013 #include <boost/config/warning_disable.hpp>
00014 #include <boost/spirit/include/qi.hpp>
00015 #include <boost/spirit/include/phoenix_operator.hpp>
00016 #include <boost/spirit/include/phoenix_object.hpp>
00017 #include <boost/lexical_cast.hpp>
00018 
00019 using namespace std;
00020 using namespace boost;
00021 
00022 //#define DEBUG_SPIRIT
00023 
00024 namespace phlogo {
00025 
00026 namespace qi = spirit::qi;
00027 namespace phoenix = phoenix;
00028 namespace ascii = spirit::ascii;
00029 
00030 struct verbs_ : qi::symbols<char, string>
00031 {
00032         verbs_() {
00033         }
00034 
00035 } verbs;
00036 
00037 class binary_op;
00038 class unary_op;
00039 class sequence_op;
00040 class nil {};
00041 
00042 class logo_ast
00043 {
00044 public:
00045 
00046         typedef
00047                 boost::variant<
00048                         nil // can't happen!
00049                   , std::string
00050                   , std::vector<logo_ast>
00051                   , boost::recursive_wrapper<logo_ast>
00052                   , boost::recursive_wrapper<binary_op>
00053                   , boost::recursive_wrapper<unary_op>
00054                 >
00055         type;
00056 
00057         logo_ast()
00058           : expr(nil()) {}
00059 
00060         template <typename Expr>
00061         logo_ast(Expr const& expr)
00062           : expr(expr) {}
00063 
00064         logo_ast& operator+=(logo_ast const& rhs);
00065         logo_ast& operator-=(logo_ast const& rhs);
00066         logo_ast& operator*=(logo_ast const& rhs);
00067         logo_ast& operator/=(logo_ast const& rhs);
00068         
00069         logo_ast& operator%=(logo_ast const& rhs);
00070 
00071 private:
00072         friend class ast_to_string;
00073         friend class ast_print;
00074         
00075         type expr;
00076 };
00077 
00078 class binary_op
00079 {
00080 public:
00081         binary_op(
00082                 char op
00083           , logo_ast const& left
00084           , logo_ast const& right)
00085         : op(op), left(left), right(right) {}
00086 
00087 private:
00088         friend class ast_to_string;
00089         friend class ast_print;
00090 
00091         char op;
00092         logo_ast left;
00093         logo_ast right;
00094 };
00095 
00096 class unary_op
00097 {
00098 public:
00099         unary_op(
00100                 char op
00101           , logo_ast const& subject)
00102         : op(op), subject(subject) {}
00103 
00104 private:
00105         friend class ast_to_string;
00106         friend class ast_print;
00107         
00108         char op;
00109         logo_ast subject;
00110 };
00111 
00112 logo_ast& logo_ast::operator+=(logo_ast const& rhs)
00113 {
00114         expr = binary_op('+', expr, rhs);
00115         return *this;
00116 }
00117 
00118 logo_ast& logo_ast::operator-=(logo_ast const& rhs)
00119 {
00120         expr = binary_op('-', expr, rhs);
00121         return *this;
00122 }
00123 
00124 logo_ast& logo_ast::operator*=(logo_ast const& rhs)
00125 {
00126         expr = binary_op('*', expr, rhs);
00127         return *this;
00128 }
00129 
00130 logo_ast& logo_ast::operator/=(logo_ast const& rhs)
00131 {
00132         expr = binary_op('/', expr, rhs);
00133         return *this;
00134 }
00135 
00136 logo_ast& logo_ast::operator%=(logo_ast const& rhs)
00137 {
00138         try {
00139                 std::vector<logo_ast> v = boost::get<std::vector<logo_ast> >(expr);
00140                 v.push_back(rhs);
00141                 expr = v;
00142         } catch (boost::bad_get x) {
00143                 try {
00144                         boost::get<nil>(expr);
00145                         std::vector<logo_ast> v;
00146                         v.push_back(rhs);
00147                         expr = v;
00148                 }
00149                 catch (boost::bad_get x) {
00150                         // must be something else, how did we get to add a sequence to it!
00151                         throw;
00152                 }
00153         }       
00154         return *this;
00155 }
00156 
00157 class ast_to_string
00158 {
00159 public:
00160         ast_to_string(std::stringstream *s) : _s(s) {}
00161         
00162         typedef void result_type;
00163         
00164         void operator()(nil) const {}
00165         void operator()(std::string s) const 
00166         { 
00167                 *_s << s; 
00168         }
00169         void operator()(std::vector<logo_ast> v) const 
00170         { 
00171                 bool empty = _s->tellp() == 0;
00172                 if (!empty)
00173                         *_s << "( ";
00174                 for (std::vector<logo_ast>::iterator i=v.begin(); i != v.end(); i++) {
00175                         if (i != v.begin())
00176                                 *_s << " ";
00177                         boost::apply_visitor(*this, i->expr);
00178                 }
00179                 if (!empty)
00180                         *_s << " )";
00181         }
00182         
00183         void operator()(logo_ast const& ast) const
00184         {
00185                 boost::apply_visitor(*this, ast.expr);
00186         }
00187         
00188         void operator()(binary_op const& expr) const
00189         {
00190                 *_s << "$" << expr.op << " ";
00191                 boost::apply_visitor(*this, expr.left.expr);
00192                 *_s << " ";
00193                 boost::apply_visitor(*this, expr.right.expr);
00194         }
00195         
00196         void operator()(unary_op const& expr) const
00197         {
00198                 *_s << "$" << expr.op << " ";
00199                 boost::apply_visitor(*this, expr.subject.expr);
00200         }
00201 
00202 private:
00203         std::stringstream *_s;
00204 };
00205 
00206 std::ostream& operator << (std::ostream& out, logo_ast const& ast) {
00207         std::stringstream s;
00208         ast_to_string printer(&s);
00209         printer(ast);
00210         out << s.str();
00211         return out;
00212 }
00213 
00214 template <typename Iterator>
00215 struct logosyntax : qi::grammar<Iterator, logo_ast(), qi::locals<string>, ascii::space_type>
00216 {
00217         logosyntax() : logosyntax::base_type(sequence)
00218         {
00219                 using namespace qi::labels;
00220                 using qi::uint_;
00221                 using qi::char_;
00222                 using qi::on_error;
00223                 using qi::rethrow;
00224                 using qi::debug;
00225                 using qi::digit;
00226                 using qi::alpha;
00227                 using qi::alnum;
00228                 using qi::string;
00229                 using qi::eol;
00230                 using qi::skip;
00231                 using qi::as_string;
00232                 
00233                 using spirit::lexeme;
00234                 
00235                 using phoenix::construct;
00236                 using phoenix::val;
00237 
00238                 sequence =
00239                         expression                                              [_val %= _1]
00240                         >> *(  
00241                                 (expression                             [_val %= _1])
00242                         ) 
00243                         >> -(';' >> *( alnum ))
00244                         ;
00245                         
00246                 expression = 
00247                         term                            [_val = _1]
00248                         >> *(  ('+' > term      [_val += _1])
00249                                  | ('-' > term  [_val -= _1])
00250                         )
00251                         ;
00252 
00253            term =
00254                         factor                  [_val = _1]
00255                         >> *(  ('*' > factor    [_val *= _1])
00256                                  | ('/' > factor        [_val /= _1])
00257                         )
00258                         ;
00259 
00260                 factor =
00261                         tokenorsubexr           [_val = _1]   
00262                         |   ('-' > factor               [_val -= _1])
00263                         |   ('+' > factor               [_val += _1])
00264                         ;
00265 
00266                 tokenorsubexr =
00267                         token                                   [_val = _1]   
00268                         | '(' 
00269                                 >> expression                   [_val %= _1]
00270                                 >> *(  
00271                                         (expression                     [_val %= _1])
00272                                 )
00273                                 >> ')'
00274                         ;
00275 
00276                 token =
00277                         as_string[lexeme[(+(digit | string(".")))]]     [_val = _1]
00278                         | '\"' > as_string[lexeme[
00279                                 +(alpha | digit | string("."))
00280                                 ]]                                                                              [_val = _1]
00281                         | as_string[lexeme[
00282                                 string(":") 
00283                 > +(alpha | digit | string("."))
00284                                 ]]                                                                              [_val = _1]
00285                         | verbs                                                                         [_val = _1]
00286                         | list                                                                          [_val = _1]
00287                         | array                                                                         [_val = _1]
00288                         ;
00289                         
00290                 element = 
00291             verbs                               [_val = _1]
00292                         | as_string[lexeme[(+alpha)]]           [_val = _1]
00293                         | as_string[lexeme[(+digit)]]           [_val = _1]
00294             | '\"' > as_string[lexeme[
00295                 +(alpha | digit | string("."))
00296                 ]]                                                              [_val = _1]
00297                         | list                              [_val = _1]
00298                         | array                             [_val = _1]
00299                         ;
00300                         
00301                 list =
00302                         (
00303                                 string("[")                                     [_a = _1]
00304                                 >> -(   
00305                                         element                                         [_a += _1]
00306                                         >> *( element                           [_a += "," + _1])       
00307                                 )
00308                                 >> string("]")                                  [_a += _1]
00309                         )                                                                       [_val = _a];
00310 
00311                 array =
00312                         (
00313                                 string("{")                                     [_a = _1]
00314                                 >> -(   
00315                                         element                                         [_a += _1]
00316                                         >> *( element                           [_a += "," + _1])       
00317                                 )
00318                                 >> string("}")                                  [_a += _1]
00319                         )                                                                       [_val = _a];
00320 
00321                 expression.name("expression");
00322                 term.name("term");
00323                 factor.name("factor");
00324                 sequence.name("sequence");
00325                 token.name("token");
00326                 tokenorsubexr.name("tokenorsubexr");
00327                 list.name("list");
00328                 array.name("array");
00329                 element.name("element");
00330                 
00331                 on_error<rethrow>
00332                 (
00333                         sequence, 
00334                         std::cout << val("")
00335                 );
00336 
00337 
00338 #ifdef DEBUG_SPIRIT
00339                 debug(sequence);
00340                 debug(token);
00341                 debug(tokenorsubexr);
00342                 debug(list);
00343                 debug(array);
00344                 debug(element);
00345                 debug(expression);
00346                 debug(term);
00347                 debug(factor);
00348 #endif
00349         }
00350 
00351    qi::rule<Iterator, logo_ast(), qi::locals<string>, ascii::space_type> sequence, expression, term, factor, tokenorsubexr;
00352    qi::rule<Iterator, string(), qi::locals<string>, ascii::space_type> list, element, token, array;
00353 };
00354 
00355 string Tokenizer::tokenizeToString(const string &input) {
00356 
00357     using spirit::ascii::space;
00358     typedef std::string::const_iterator iterator_type;
00359     typedef logosyntax<iterator_type> logosyntax;
00360 
00361         // add to the symbol table...
00362         std::map<std::string, std::string>::const_iterator i = _verbs->getVerbTokenMap().begin();
00363         for (; i != _verbs->getVerbTokenMap().end(); i++)
00364                 verbs.add(i->first, i->second);
00365         i = _verbs->getFuncTokenMap().begin();
00366         for (; i != _verbs->getFuncTokenMap().end(); i++)
00367                 verbs.add(i->first, i->second);
00368         
00369         logosyntax syntax;
00370         std::string::const_iterator iter = input.begin();
00371     std::string::const_iterator end = input.end();
00372     logo_ast result;
00373         bool r;
00374     try {
00375                 r = phrase_parse(iter, end, syntax, space, result);
00376         }
00377         catch (spirit::qi::expectation_failure<iterator_type> const& x) {
00378         
00379                 try {
00380                         BOOST_THROW_EXCEPTION(
00381                                 dont_know_how_exception() << 
00382                                         errinfo_tag(x.what_.tag) << 
00383                                         errinfo_value(std::string(x.first, x.last)) );
00384                 }
00385                 catch (bad_get const &x) {
00386                         r = false;
00387                 }
00388         }
00389         if (!r || iter != end) {
00390                 BOOST_THROW_EXCEPTION(
00391                           dont_know_how_exception() << 
00392                           errinfo_tag("expr") << 
00393                           errinfo_value(input) );
00394         }
00395         std::stringstream s;
00396         ast_to_string printer(&s);
00397         printer(result);
00398         
00399         return s.str();
00400 
00401 }
00402 
00403 }
 All Classes Functions