mirror of
https://github.com/PurpleI2P/i2pd.git
synced 2025-06-08 15:16:52 +02:00
start work on webconsole with templates
Signed-off-by: R4SAS <r4sas@i2pmail.org>
This commit is contained in:
parent
47460d86b2
commit
a843be75f3
60 changed files with 24925 additions and 38 deletions
435
libi2pd_webconsole/inja/lexer.hpp
Normal file
435
libi2pd_webconsole/inja/lexer.hpp
Normal file
|
@ -0,0 +1,435 @@
|
|||
#ifndef INCLUDE_INJA_LEXER_HPP_
|
||||
#define INCLUDE_INJA_LEXER_HPP_
|
||||
|
||||
#include <cctype>
|
||||
#include <locale>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "token.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace inja {
|
||||
|
||||
/*!
|
||||
* \brief Class for lexing an inja Template.
|
||||
*/
|
||||
class Lexer {
|
||||
enum class State {
|
||||
Text,
|
||||
ExpressionStart,
|
||||
ExpressionStartForceLstrip,
|
||||
ExpressionBody,
|
||||
LineStart,
|
||||
LineBody,
|
||||
StatementStart,
|
||||
StatementStartNoLstrip,
|
||||
StatementStartForceLstrip,
|
||||
StatementBody,
|
||||
CommentStart,
|
||||
CommentStartForceLstrip,
|
||||
CommentBody,
|
||||
};
|
||||
|
||||
enum class MinusState {
|
||||
Operator,
|
||||
Number,
|
||||
};
|
||||
|
||||
const LexerConfig& config;
|
||||
|
||||
State state;
|
||||
MinusState minus_state;
|
||||
std::string_view m_in;
|
||||
size_t tok_start;
|
||||
size_t pos;
|
||||
|
||||
Token scan_body(std::string_view close, Token::Kind closeKind, std::string_view close_trim = std::string_view(), bool trim = false) {
|
||||
again:
|
||||
// skip whitespace (except for \n as it might be a close)
|
||||
if (tok_start >= m_in.size()) {
|
||||
return make_token(Token::Kind::Eof);
|
||||
}
|
||||
const char ch = m_in[tok_start];
|
||||
if (ch == ' ' || ch == '\t' || ch == '\r') {
|
||||
tok_start += 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
// check for close
|
||||
if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
|
||||
state = State::Text;
|
||||
pos = tok_start + close_trim.size();
|
||||
const Token tok = make_token(closeKind);
|
||||
skip_whitespaces_and_newlines();
|
||||
return tok;
|
||||
}
|
||||
|
||||
if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
|
||||
state = State::Text;
|
||||
pos = tok_start + close.size();
|
||||
const Token tok = make_token(closeKind);
|
||||
if (trim) {
|
||||
skip_whitespaces_and_first_newline();
|
||||
}
|
||||
return tok;
|
||||
}
|
||||
|
||||
// skip \n
|
||||
if (ch == '\n') {
|
||||
tok_start += 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
pos = tok_start + 1;
|
||||
if (std::isalpha(ch)) {
|
||||
minus_state = MinusState::Operator;
|
||||
return scan_id();
|
||||
}
|
||||
|
||||
const MinusState current_minus_state = minus_state;
|
||||
if (minus_state == MinusState::Operator) {
|
||||
minus_state = MinusState::Number;
|
||||
}
|
||||
|
||||
switch (ch) {
|
||||
case '+':
|
||||
return make_token(Token::Kind::Plus);
|
||||
case '-':
|
||||
if (current_minus_state == MinusState::Operator) {
|
||||
return make_token(Token::Kind::Minus);
|
||||
}
|
||||
return scan_number();
|
||||
case '*':
|
||||
return make_token(Token::Kind::Times);
|
||||
case '/':
|
||||
return make_token(Token::Kind::Slash);
|
||||
case '^':
|
||||
return make_token(Token::Kind::Power);
|
||||
case '%':
|
||||
return make_token(Token::Kind::Percent);
|
||||
case '.':
|
||||
return make_token(Token::Kind::Dot);
|
||||
case ',':
|
||||
return make_token(Token::Kind::Comma);
|
||||
case ':':
|
||||
return make_token(Token::Kind::Colon);
|
||||
case '(':
|
||||
return make_token(Token::Kind::LeftParen);
|
||||
case ')':
|
||||
minus_state = MinusState::Operator;
|
||||
return make_token(Token::Kind::RightParen);
|
||||
case '[':
|
||||
return make_token(Token::Kind::LeftBracket);
|
||||
case ']':
|
||||
minus_state = MinusState::Operator;
|
||||
return make_token(Token::Kind::RightBracket);
|
||||
case '{':
|
||||
return make_token(Token::Kind::LeftBrace);
|
||||
case '}':
|
||||
minus_state = MinusState::Operator;
|
||||
return make_token(Token::Kind::RightBrace);
|
||||
case '>':
|
||||
if (pos < m_in.size() && m_in[pos] == '=') {
|
||||
pos += 1;
|
||||
return make_token(Token::Kind::GreaterEqual);
|
||||
}
|
||||
return make_token(Token::Kind::GreaterThan);
|
||||
case '<':
|
||||
if (pos < m_in.size() && m_in[pos] == '=') {
|
||||
pos += 1;
|
||||
return make_token(Token::Kind::LessEqual);
|
||||
}
|
||||
return make_token(Token::Kind::LessThan);
|
||||
case '=':
|
||||
if (pos < m_in.size() && m_in[pos] == '=') {
|
||||
pos += 1;
|
||||
return make_token(Token::Kind::Equal);
|
||||
}
|
||||
return make_token(Token::Kind::Unknown);
|
||||
case '!':
|
||||
if (pos < m_in.size() && m_in[pos] == '=') {
|
||||
pos += 1;
|
||||
return make_token(Token::Kind::NotEqual);
|
||||
}
|
||||
return make_token(Token::Kind::Unknown);
|
||||
case '\"':
|
||||
return scan_string();
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
minus_state = MinusState::Operator;
|
||||
return scan_number();
|
||||
case '_':
|
||||
case '@':
|
||||
case '$':
|
||||
minus_state = MinusState::Operator;
|
||||
return scan_id();
|
||||
default:
|
||||
return make_token(Token::Kind::Unknown);
|
||||
}
|
||||
}
|
||||
|
||||
Token scan_id() {
|
||||
for (;;) {
|
||||
if (pos >= m_in.size()) {
|
||||
break;
|
||||
}
|
||||
const char ch = m_in[pos];
|
||||
if (!std::isalnum(ch) && ch != '.' && ch != '/' && ch != '_' && ch != '-') {
|
||||
break;
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
return make_token(Token::Kind::Id);
|
||||
}
|
||||
|
||||
Token scan_number() {
|
||||
for (;;) {
|
||||
if (pos >= m_in.size()) {
|
||||
break;
|
||||
}
|
||||
const char ch = m_in[pos];
|
||||
// be very permissive in lexer (we'll catch errors when conversion happens)
|
||||
if (!(std::isdigit(ch) || ch == '.' || ch == 'e' || ch == 'E' || (ch == '+' && (pos == 0 || m_in[pos-1] == 'e' || m_in[pos-1] == 'E')) || (ch == '-' && (pos == 0 || m_in[pos-1] == 'e' || m_in[pos-1] == 'E')))) {
|
||||
break;
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
return make_token(Token::Kind::Number);
|
||||
}
|
||||
|
||||
Token scan_string() {
|
||||
bool escape {false};
|
||||
for (;;) {
|
||||
if (pos >= m_in.size()) {
|
||||
break;
|
||||
}
|
||||
const char ch = m_in[pos++];
|
||||
if (ch == '\\') {
|
||||
escape = true;
|
||||
} else if (!escape && ch == m_in[tok_start]) {
|
||||
break;
|
||||
} else {
|
||||
escape = false;
|
||||
}
|
||||
}
|
||||
return make_token(Token::Kind::String);
|
||||
}
|
||||
|
||||
Token make_token(Token::Kind kind) const {
|
||||
return Token(kind, string_view::slice(m_in, tok_start, pos));
|
||||
}
|
||||
|
||||
void skip_whitespaces_and_newlines() {
|
||||
if (pos < m_in.size()) {
|
||||
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void skip_whitespaces_and_first_newline() {
|
||||
if (pos < m_in.size()) {
|
||||
while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos < m_in.size()) {
|
||||
const char ch = m_in[pos];
|
||||
if (ch == '\n') {
|
||||
pos += 1;
|
||||
} else if (ch == '\r') {
|
||||
pos += 1;
|
||||
if (pos < m_in.size() && m_in[pos] == '\n') {
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string_view clear_final_line_if_whitespace(std::string_view text) {
|
||||
std::string_view result = text;
|
||||
while (!result.empty()) {
|
||||
const char ch = result.back();
|
||||
if (ch == ' ' || ch == '\t') {
|
||||
result.remove_suffix(1);
|
||||
} else if (ch == '\n' || ch == '\r') {
|
||||
break;
|
||||
} else {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
explicit Lexer(const LexerConfig& config): config(config), state(State::Text), minus_state(MinusState::Number) {}
|
||||
|
||||
SourceLocation current_position() const {
|
||||
return get_source_location(m_in, tok_start);
|
||||
}
|
||||
|
||||
void start(std::string_view input) {
|
||||
m_in = input;
|
||||
tok_start = 0;
|
||||
pos = 0;
|
||||
state = State::Text;
|
||||
minus_state = MinusState::Number;
|
||||
|
||||
// Consume byte order mark (BOM) for UTF-8
|
||||
if (inja::string_view::starts_with(m_in, "\xEF\xBB\xBF")) {
|
||||
m_in = m_in.substr(3);
|
||||
}
|
||||
}
|
||||
|
||||
Token scan() {
|
||||
tok_start = pos;
|
||||
|
||||
again:
|
||||
if (tok_start >= m_in.size()) {
|
||||
return make_token(Token::Kind::Eof);
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
default:
|
||||
case State::Text: {
|
||||
// fast-scan to first open character
|
||||
const size_t open_start = m_in.substr(pos).find_first_of(config.open_chars);
|
||||
if (open_start == std::string_view::npos) {
|
||||
// didn't find open, return remaining text as text token
|
||||
pos = m_in.size();
|
||||
return make_token(Token::Kind::Text);
|
||||
}
|
||||
pos += open_start;
|
||||
|
||||
// try to match one of the opening sequences, and get the close
|
||||
std::string_view open_str = m_in.substr(pos);
|
||||
bool must_lstrip = false;
|
||||
if (inja::string_view::starts_with(open_str, config.expression_open)) {
|
||||
if (inja::string_view::starts_with(open_str, config.expression_open_force_lstrip)) {
|
||||
state = State::ExpressionStartForceLstrip;
|
||||
must_lstrip = true;
|
||||
} else {
|
||||
state = State::ExpressionStart;
|
||||
}
|
||||
} else if (inja::string_view::starts_with(open_str, config.statement_open)) {
|
||||
if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
|
||||
state = State::StatementStartNoLstrip;
|
||||
} else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip)) {
|
||||
state = State::StatementStartForceLstrip;
|
||||
must_lstrip = true;
|
||||
} else {
|
||||
state = State::StatementStart;
|
||||
must_lstrip = config.lstrip_blocks;
|
||||
}
|
||||
} else if (inja::string_view::starts_with(open_str, config.comment_open)) {
|
||||
if (inja::string_view::starts_with(open_str, config.comment_open_force_lstrip)) {
|
||||
state = State::CommentStartForceLstrip;
|
||||
must_lstrip = true;
|
||||
} else {
|
||||
state = State::CommentStart;
|
||||
must_lstrip = config.lstrip_blocks;
|
||||
}
|
||||
} else if ((pos == 0 || m_in[pos - 1] == '\n') && inja::string_view::starts_with(open_str, config.line_statement)) {
|
||||
state = State::LineStart;
|
||||
} else {
|
||||
pos += 1; // wasn't actually an opening sequence
|
||||
goto again;
|
||||
}
|
||||
|
||||
std::string_view text = string_view::slice(m_in, tok_start, pos);
|
||||
if (must_lstrip) {
|
||||
text = clear_final_line_if_whitespace(text);
|
||||
}
|
||||
|
||||
if (text.empty()) {
|
||||
goto again; // don't generate empty token
|
||||
}
|
||||
return Token(Token::Kind::Text, text);
|
||||
}
|
||||
case State::ExpressionStart: {
|
||||
state = State::ExpressionBody;
|
||||
pos += config.expression_open.size();
|
||||
return make_token(Token::Kind::ExpressionOpen);
|
||||
}
|
||||
case State::ExpressionStartForceLstrip: {
|
||||
state = State::ExpressionBody;
|
||||
pos += config.expression_open_force_lstrip.size();
|
||||
return make_token(Token::Kind::ExpressionOpen);
|
||||
}
|
||||
case State::LineStart: {
|
||||
state = State::LineBody;
|
||||
pos += config.line_statement.size();
|
||||
return make_token(Token::Kind::LineStatementOpen);
|
||||
}
|
||||
case State::StatementStart: {
|
||||
state = State::StatementBody;
|
||||
pos += config.statement_open.size();
|
||||
return make_token(Token::Kind::StatementOpen);
|
||||
}
|
||||
case State::StatementStartNoLstrip: {
|
||||
state = State::StatementBody;
|
||||
pos += config.statement_open_no_lstrip.size();
|
||||
return make_token(Token::Kind::StatementOpen);
|
||||
}
|
||||
case State::StatementStartForceLstrip: {
|
||||
state = State::StatementBody;
|
||||
pos += config.statement_open_force_lstrip.size();
|
||||
return make_token(Token::Kind::StatementOpen);
|
||||
}
|
||||
case State::CommentStart: {
|
||||
state = State::CommentBody;
|
||||
pos += config.comment_open.size();
|
||||
return make_token(Token::Kind::CommentOpen);
|
||||
}
|
||||
case State::CommentStartForceLstrip: {
|
||||
state = State::CommentBody;
|
||||
pos += config.comment_open_force_lstrip.size();
|
||||
return make_token(Token::Kind::CommentOpen);
|
||||
}
|
||||
case State::ExpressionBody:
|
||||
return scan_body(config.expression_close, Token::Kind::ExpressionClose, config.expression_close_force_rstrip);
|
||||
case State::LineBody:
|
||||
return scan_body("\n", Token::Kind::LineStatementClose);
|
||||
case State::StatementBody:
|
||||
return scan_body(config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
|
||||
case State::CommentBody: {
|
||||
// fast-scan to comment close
|
||||
const size_t end = m_in.substr(pos).find(config.comment_close);
|
||||
if (end == std::string_view::npos) {
|
||||
pos = m_in.size();
|
||||
return make_token(Token::Kind::Eof);
|
||||
}
|
||||
|
||||
// Check for trim pattern
|
||||
const bool must_rstrip = inja::string_view::starts_with(m_in.substr(pos + end - 1), config.comment_close_force_rstrip);
|
||||
|
||||
// return the entire comment in the close token
|
||||
state = State::Text;
|
||||
pos += end + config.comment_close.size();
|
||||
Token tok = make_token(Token::Kind::CommentClose);
|
||||
|
||||
if (must_rstrip || config.trim_blocks) {
|
||||
skip_whitespaces_and_first_newline();
|
||||
}
|
||||
return tok;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const LexerConfig& get_config() const {
|
||||
return config;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace inja
|
||||
|
||||
#endif // INCLUDE_INJA_LEXER_HPP_
|
Loading…
Add table
Add a link
Reference in a new issue