|
PH Logo
Logo intepreter modeled after UCB Logo.
|
00001 /* 00002 * Tokenizer.cpp 00003 * 00004 * Author: Paul Hamilton 00005 * Date: 4 Jul 2011 00006 * 00007 */ 00008 #include "Tokenizer.h" 00009 00010 #include "Exceptions.h" 00011 #include "Verbs.h" 00012 00013 #include <boost/config/warning_disable.hpp> 00014 #include <boost/spirit/include/qi.hpp> 00015 #include <boost/spirit/include/phoenix_operator.hpp> 00016 #include <boost/spirit/include/phoenix_object.hpp> 00017 #include <boost/lexical_cast.hpp> 00018 00019 using namespace std; 00020 using namespace boost; 00021 00022 //#define DEBUG_SPIRIT 00023 00024 namespace phlogo { 00025 00026 namespace qi = spirit::qi; 00027 namespace phoenix = phoenix; 00028 namespace ascii = spirit::ascii; 00029 00030 struct verbs_ : qi::symbols<char, string> 00031 { 00032 verbs_() { 00033 } 00034 00035 } verbs; 00036 00037 class binary_op; 00038 class unary_op; 00039 class sequence_op; 00040 class nil {}; 00041 00042 class logo_ast 00043 { 00044 public: 00045 00046 typedef 00047 boost::variant< 00048 nil // can't happen! 00049 , std::string 00050 , std::vector<logo_ast> 00051 , boost::recursive_wrapper<logo_ast> 00052 , boost::recursive_wrapper<binary_op> 00053 , boost::recursive_wrapper<unary_op> 00054 > 00055 type; 00056 00057 logo_ast() 00058 : expr(nil()) {} 00059 00060 template <typename Expr> 00061 logo_ast(Expr const& expr) 00062 : expr(expr) {} 00063 00064 logo_ast& operator+=(logo_ast const& rhs); 00065 logo_ast& operator-=(logo_ast const& rhs); 00066 logo_ast& operator*=(logo_ast const& rhs); 00067 logo_ast& operator/=(logo_ast const& rhs); 00068 00069 logo_ast& operator%=(logo_ast const& rhs); 00070 00071 private: 00072 friend class ast_to_string; 00073 friend class ast_print; 00074 00075 type expr; 00076 }; 00077 00078 class binary_op 00079 { 00080 public: 00081 binary_op( 00082 char op 00083 , logo_ast const& left 00084 , logo_ast const& right) 00085 : op(op), left(left), right(right) {} 00086 00087 private: 00088 friend class ast_to_string; 00089 friend class ast_print; 00090 00091 char op; 00092 logo_ast left; 00093 logo_ast right; 00094 }; 00095 00096 class unary_op 00097 { 00098 public: 00099 unary_op( 00100 char op 00101 , logo_ast const& subject) 00102 : op(op), subject(subject) {} 00103 00104 private: 00105 friend class ast_to_string; 00106 friend class ast_print; 00107 00108 char op; 00109 logo_ast subject; 00110 }; 00111 00112 logo_ast& logo_ast::operator+=(logo_ast const& rhs) 00113 { 00114 expr = binary_op('+', expr, rhs); 00115 return *this; 00116 } 00117 00118 logo_ast& logo_ast::operator-=(logo_ast const& rhs) 00119 { 00120 expr = binary_op('-', expr, rhs); 00121 return *this; 00122 } 00123 00124 logo_ast& logo_ast::operator*=(logo_ast const& rhs) 00125 { 00126 expr = binary_op('*', expr, rhs); 00127 return *this; 00128 } 00129 00130 logo_ast& logo_ast::operator/=(logo_ast const& rhs) 00131 { 00132 expr = binary_op('/', expr, rhs); 00133 return *this; 00134 } 00135 00136 logo_ast& logo_ast::operator%=(logo_ast const& rhs) 00137 { 00138 try { 00139 std::vector<logo_ast> v = boost::get<std::vector<logo_ast> >(expr); 00140 v.push_back(rhs); 00141 expr = v; 00142 } catch (boost::bad_get x) { 00143 try { 00144 boost::get<nil>(expr); 00145 std::vector<logo_ast> v; 00146 v.push_back(rhs); 00147 expr = v; 00148 } 00149 catch (boost::bad_get x) { 00150 // must be something else, how did we get to add a sequence to it! 00151 throw; 00152 } 00153 } 00154 return *this; 00155 } 00156 00157 class ast_to_string 00158 { 00159 public: 00160 ast_to_string(std::stringstream *s) : _s(s) {} 00161 00162 typedef void result_type; 00163 00164 void operator()(nil) const {} 00165 void operator()(std::string s) const 00166 { 00167 *_s << s; 00168 } 00169 void operator()(std::vector<logo_ast> v) const 00170 { 00171 bool empty = _s->tellp() == 0; 00172 if (!empty) 00173 *_s << "( "; 00174 for (std::vector<logo_ast>::iterator i=v.begin(); i != v.end(); i++) { 00175 if (i != v.begin()) 00176 *_s << " "; 00177 boost::apply_visitor(*this, i->expr); 00178 } 00179 if (!empty) 00180 *_s << " )"; 00181 } 00182 00183 void operator()(logo_ast const& ast) const 00184 { 00185 boost::apply_visitor(*this, ast.expr); 00186 } 00187 00188 void operator()(binary_op const& expr) const 00189 { 00190 *_s << "$" << expr.op << " "; 00191 boost::apply_visitor(*this, expr.left.expr); 00192 *_s << " "; 00193 boost::apply_visitor(*this, expr.right.expr); 00194 } 00195 00196 void operator()(unary_op const& expr) const 00197 { 00198 *_s << "$" << expr.op << " "; 00199 boost::apply_visitor(*this, expr.subject.expr); 00200 } 00201 00202 private: 00203 std::stringstream *_s; 00204 }; 00205 00206 std::ostream& operator << (std::ostream& out, logo_ast const& ast) { 00207 std::stringstream s; 00208 ast_to_string printer(&s); 00209 printer(ast); 00210 out << s.str(); 00211 return out; 00212 } 00213 00214 template <typename Iterator> 00215 struct logosyntax : qi::grammar<Iterator, logo_ast(), qi::locals<string>, ascii::space_type> 00216 { 00217 logosyntax() : logosyntax::base_type(sequence) 00218 { 00219 using namespace qi::labels; 00220 using qi::uint_; 00221 using qi::char_; 00222 using qi::on_error; 00223 using qi::rethrow; 00224 using qi::debug; 00225 using qi::digit; 00226 using qi::alpha; 00227 using qi::alnum; 00228 using qi::string; 00229 using qi::eol; 00230 using qi::skip; 00231 using qi::as_string; 00232 00233 using spirit::lexeme; 00234 00235 using phoenix::construct; 00236 using phoenix::val; 00237 00238 sequence = 00239 expression [_val %= _1] 00240 >> *( 00241 (expression [_val %= _1]) 00242 ) 00243 >> -(';' >> *( alnum )) 00244 ; 00245 00246 expression = 00247 term [_val = _1] 00248 >> *( ('+' > term [_val += _1]) 00249 | ('-' > term [_val -= _1]) 00250 ) 00251 ; 00252 00253 term = 00254 factor [_val = _1] 00255 >> *( ('*' > factor [_val *= _1]) 00256 | ('/' > factor [_val /= _1]) 00257 ) 00258 ; 00259 00260 factor = 00261 tokenorsubexr [_val = _1] 00262 | ('-' > factor [_val -= _1]) 00263 | ('+' > factor [_val += _1]) 00264 ; 00265 00266 tokenorsubexr = 00267 token [_val = _1] 00268 | '(' 00269 >> expression [_val %= _1] 00270 >> *( 00271 (expression [_val %= _1]) 00272 ) 00273 >> ')' 00274 ; 00275 00276 token = 00277 as_string[lexeme[(+(digit | string(".")))]] [_val = _1] 00278 | '\"' > as_string[lexeme[ 00279 +(alpha | digit | string(".")) 00280 ]] [_val = _1] 00281 | as_string[lexeme[ 00282 string(":") 00283 > +(alpha | digit | string(".")) 00284 ]] [_val = _1] 00285 | verbs [_val = _1] 00286 | list [_val = _1] 00287 | array [_val = _1] 00288 ; 00289 00290 element = 00291 verbs [_val = _1] 00292 | as_string[lexeme[(+alpha)]] [_val = _1] 00293 | as_string[lexeme[(+digit)]] [_val = _1] 00294 | '\"' > as_string[lexeme[ 00295 +(alpha | digit | string(".")) 00296 ]] [_val = _1] 00297 | list [_val = _1] 00298 | array [_val = _1] 00299 ; 00300 00301 list = 00302 ( 00303 string("[") [_a = _1] 00304 >> -( 00305 element [_a += _1] 00306 >> *( element [_a += "," + _1]) 00307 ) 00308 >> string("]") [_a += _1] 00309 ) [_val = _a]; 00310 00311 array = 00312 ( 00313 string("{") [_a = _1] 00314 >> -( 00315 element [_a += _1] 00316 >> *( element [_a += "," + _1]) 00317 ) 00318 >> string("}") [_a += _1] 00319 ) [_val = _a]; 00320 00321 expression.name("expression"); 00322 term.name("term"); 00323 factor.name("factor"); 00324 sequence.name("sequence"); 00325 token.name("token"); 00326 tokenorsubexr.name("tokenorsubexr"); 00327 list.name("list"); 00328 array.name("array"); 00329 element.name("element"); 00330 00331 on_error<rethrow> 00332 ( 00333 sequence, 00334 std::cout << val("") 00335 ); 00336 00337 00338 #ifdef DEBUG_SPIRIT 00339 debug(sequence); 00340 debug(token); 00341 debug(tokenorsubexr); 00342 debug(list); 00343 debug(array); 00344 debug(element); 00345 debug(expression); 00346 debug(term); 00347 debug(factor); 00348 #endif 00349 } 00350 00351 qi::rule<Iterator, logo_ast(), qi::locals<string>, ascii::space_type> sequence, expression, term, factor, tokenorsubexr; 00352 qi::rule<Iterator, string(), qi::locals<string>, ascii::space_type> list, element, token, array; 00353 }; 00354 00355 string Tokenizer::tokenizeToString(const string &input) { 00356 00357 using spirit::ascii::space; 00358 typedef std::string::const_iterator iterator_type; 00359 typedef logosyntax<iterator_type> logosyntax; 00360 00361 // add to the symbol table... 00362 std::map<std::string, std::string>::const_iterator i = _verbs->getVerbTokenMap().begin(); 00363 for (; i != _verbs->getVerbTokenMap().end(); i++) 00364 verbs.add(i->first, i->second); 00365 i = _verbs->getFuncTokenMap().begin(); 00366 for (; i != _verbs->getFuncTokenMap().end(); i++) 00367 verbs.add(i->first, i->second); 00368 00369 logosyntax syntax; 00370 std::string::const_iterator iter = input.begin(); 00371 std::string::const_iterator end = input.end(); 00372 logo_ast result; 00373 bool r; 00374 try { 00375 r = phrase_parse(iter, end, syntax, space, result); 00376 } 00377 catch (spirit::qi::expectation_failure<iterator_type> const& x) { 00378 00379 try { 00380 BOOST_THROW_EXCEPTION( 00381 dont_know_how_exception() << 00382 errinfo_tag(x.what_.tag) << 00383 errinfo_value(std::string(x.first, x.last)) ); 00384 } 00385 catch (bad_get const &x) { 00386 r = false; 00387 } 00388 } 00389 if (!r || iter != end) { 00390 BOOST_THROW_EXCEPTION( 00391 dont_know_how_exception() << 00392 errinfo_tag("expr") << 00393 errinfo_value(input) ); 00394 } 00395 std::stringstream s; 00396 ast_to_string printer(&s); 00397 printer(result); 00398 00399 return s.str(); 00400 00401 } 00402 00403 }
1.7.4