mirror of
https://github.com/MeowLynxSea/ceditor.git
synced 2025-07-09 10:54:37 +00:00
LexicalAnalysis类现在支持注释、空格和换行了
This commit is contained in:
parent
598041d6ff
commit
fafb31ac7f
@ -1,4 +1,4 @@
|
||||
#include "Tokenizer.h"
|
||||
#include "LexicalAnalysis.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
@ -18,10 +18,10 @@ else j=b-a;\n\
|
||||
return j;\n\
|
||||
}";
|
||||
std::string ruleName = "cpp";
|
||||
Tokenizer tokenizer = Tokenizer(ruleName);
|
||||
tokenizer.setRawText(input);
|
||||
tokenizer.printProcessedText();
|
||||
MyVector<Token> tokens = tokenizer.tokenize();
|
||||
LexicalAnalysis lexicalAnalysis = LexicalAnalysis(ruleName);
|
||||
lexicalAnalysis.setRawText(input);
|
||||
lexicalAnalysis.printProcessedText();
|
||||
MyVector<Token> tokens = lexicalAnalysis.tokenize();
|
||||
std::cout << "Tokenized text: " << std::endl;
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
if(tokens[i].type == TokenType::TOKEN_TYPE_IDENTIFIER) {
|
||||
@ -38,11 +38,16 @@ else j=b-a;\n\
|
||||
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "End of file.";
|
||||
} else if(tokens[i].type == TokenType::TOKEN_TYPE_COMMENT) {
|
||||
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Comment: ";
|
||||
} else if(tokens[i].type == TokenType::TOKEN_TYPE_NEWLINE) {
|
||||
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Newline ";
|
||||
} else {
|
||||
std::cout << std::fixed << std::setw(25) << std::setfill(' ') << std::right << "Unknown token: ";
|
||||
}
|
||||
std::cout << " " << tokens[i].value << std::endl;
|
||||
}
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
std::cout << tokens[i].value;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
#ifndef TOKENIZER_H
|
||||
#define TOKENIZER_H
|
||||
#ifndef LEXICALANALYSIS_H
|
||||
#define LEXICALANALYSIS_H
|
||||
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
@ -14,7 +14,8 @@ enum class TokenType {
|
||||
TOKEN_TYPE_OPERATOR_OR_DELIMITER,
|
||||
TOKEN_TYPE_EOF,
|
||||
TOKEN_TYPE_UNDEFINED,
|
||||
TOKEN_TYPE_COMMENT
|
||||
TOKEN_TYPE_COMMENT,
|
||||
TOKEN_TYPE_NEWLINE
|
||||
};
|
||||
|
||||
struct Token {
|
||||
@ -31,7 +32,7 @@ struct Token {
|
||||
static const std::string operatorOrDelimiter[] = {"+", "-", "*", "/", "<", "=", ">", ";", "(", ")", "^", ",", "\'", "#", "&", "|", "%", "~", "[", "]", "{", "}", "\\", ".", "\?", ":", "!", "\"",
|
||||
"<=", ">=", "==", "!=", "&&", "<<", ">>", "||", "++", "--", "+=", "-=", "*=", "/=", "%=", "|=", "&=", "^=", "<<=", ">>=", "::", "->"};
|
||||
|
||||
class Tokenizer {
|
||||
class LexicalAnalysis {
|
||||
private:
|
||||
std::string ruleName;
|
||||
MyVector<std::string> reserveWord;
|
||||
@ -83,6 +84,8 @@ private:
|
||||
}
|
||||
|
||||
void preprocess() {
|
||||
preprocessedText = rawText;
|
||||
return;
|
||||
preprocessedText = "";
|
||||
bool inString = false;
|
||||
for(int i = 0; i < rawText.size(); i++) {
|
||||
@ -117,7 +120,9 @@ private:
|
||||
void Scan(int & currentIndex) {
|
||||
currentToken = "";
|
||||
|
||||
while(preprocessedText[currentIndex] == ' ') currentIndex++;
|
||||
while(preprocessedText[currentIndex] == ' ') {
|
||||
currentToken += preprocessedText[currentIndex++];
|
||||
}
|
||||
|
||||
// printf("current letter: [%d]%c\n", currentIndex, preprocessedText[currentIndex]);
|
||||
|
||||
@ -190,6 +195,7 @@ private:
|
||||
while(currentIndex < rawText.size() - 1 && !(preprocessedText[currentIndex] == '*' && preprocessedText[currentIndex + 1] == '/')) {
|
||||
currentToken += preprocessedText[currentIndex++];
|
||||
}
|
||||
currentToken += "/*";
|
||||
currentIndex += 2;
|
||||
return;
|
||||
}
|
||||
@ -210,7 +216,12 @@ private:
|
||||
syn = static_cast<int>(TokenType::TOKEN_TYPE_EOF);
|
||||
currentIndex++;
|
||||
return;
|
||||
} else if(preprocessedText[currentIndex] != '\n') {
|
||||
} else if(preprocessedText[currentIndex] == '\n') {
|
||||
syn = static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE);
|
||||
currentToken = "\n";
|
||||
currentIndex++;
|
||||
return;
|
||||
} else {
|
||||
syn = static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED);
|
||||
currentIndex++;
|
||||
return;
|
||||
@ -218,7 +229,7 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
Tokenizer(std::string ruleName) {
|
||||
LexicalAnalysis(std::string ruleName) {
|
||||
this->ruleName = ruleName;
|
||||
std::ifstream file("../data/highlight/" + ruleName + "/keywords.txt"); // Just for unit test
|
||||
// std::ifstream file("./data/highlight/" + ruleName + "/keywords.txt");
|
||||
@ -254,8 +265,7 @@ public:
|
||||
tokens.clear();
|
||||
while(syn != static_cast<int>(TokenType::TOKEN_TYPE_EOF) && syn != static_cast<int>(TokenType::TOKEN_TYPE_UNDEFINED)) {
|
||||
Scan(currentIndex);
|
||||
// printf("currentToken: [%s]\n", currentToken.c_str());
|
||||
if(currentToken == "") continue;
|
||||
printf("currentToken: [%s]\n", currentToken.c_str());
|
||||
if(syn == static_cast<int>(TokenType::TOKEN_TYPE_STRING)) {
|
||||
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_STRING));
|
||||
// printf("string: %s\n", currentToken.c_str());
|
||||
@ -274,6 +284,12 @@ public:
|
||||
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_COMMENT)) {
|
||||
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_COMMENT));
|
||||
// printf("comment: %s\n", currentToken.c_str());
|
||||
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_EOF)) {
|
||||
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_EOF));
|
||||
// printf("EOF: %s\n", currentToken.c_str());
|
||||
} else if(syn == static_cast<int>(TokenType::TOKEN_TYPE_NEWLINE)) {
|
||||
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_NEWLINE));
|
||||
// printf("newline: %s\n", currentToken.c_str());
|
||||
} else {
|
||||
tokens.push_back(Token(currentToken, TokenType::TOKEN_TYPE_UNDEFINED));
|
||||
// printf("undefined: %s\n", currentToken.c_str());
|
BIN
utils/Tokenizer
BIN
utils/Tokenizer
Binary file not shown.
Loading…
Reference in New Issue
Block a user