13. Lexer
class Lexer {
/// Lex a token. If c TriviaRetentionMode is c WithTrivia, passed pointers
/// to trivias are populated.
void lex(Token &Result, ParsedTrivia &LeadingTriviaResult,
ParsedTrivia &TrailingTriviaResult);
void lexImpl();
void lexHash();
void lexIdentifier();
void lexOperatorIdentifier();
void lexNumber();
void lexTrivia(ParsedTrivia &T, bool IsForTrailingTrivia);
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
...
}
14. SyntaxParsingContext
class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
RootContextData *RootData;
// If false, context does nothing.
bool Enabled;
void addRawSyntax(ParsedRawSyntaxNode Raw);
void addToken(Token &Tok, const ParsedTrivia &LeadingTrivia,
const ParsedTrivia &TrailingTrivia);
void addSyntax(ParsedSyntax Node);
void setCreateSyntax(SyntaxKind kind);
/// Explicitly finalizing syntax tree creation.
/// This function will be called during the destroying of a root syntax
/// parsing context. However, we can explicitly call this function to get
/// the syntax tree before closing the root context.
ParsedRawSyntaxNode finalizeRoot();
...
}
15. SyntaxParseActions
class SyntaxParseActions {
virtual OpaqueSyntaxNode recordToken(tok tokenKind,
ArrayRef<ParsedTriviaPiece> leadingTrivia,
ArrayRef<ParsedTriviaPiece> trailingTrivia,
CharSourceRange range) = 0;
/// Record a missing token. c loc can be invalid or an approximate location
/// of where the token would be if not missing.
virtual OpaqueSyntaxNode recordMissingToken(tok tokenKind, SourceLoc loc) = 0;
/// The provided c elements are an exact layout appropriate for the syntax
/// c kind. Missing optional elements are represented with a null
/// OpaqueSyntaxNode object.
virtual OpaqueSyntaxNode recordRawSyntax(syntax::SyntaxKind kind,
ArrayRef<OpaqueSyntaxNode> elements,
CharSourceRange range) = 0;
}
16. Parser
class Parser {
SourceFile &SF;
Lexer *L;
ASTContext &Context;
SyntaxParsingContext *SyntaxContext;
Token Tok;
ConsumeTokenReceiver *TokReceiver;
...
/// Calling this function to finalize libSyntax tree creation without destroying
/// the parser instance.
ParsedRawSyntaxNode finalizeSyntaxTree() {
assert(Tok.is(tok::eof) && "not done parsing yet");
return SyntaxContext->finalizeRoot();
}
bool parseTopLevel();
}
17. SourceFile
class SourceFile final : public FileUnit {
/// The list of top-level declarations in the source file.
std::vector<Decl*> Decls;
SyntaxParsingCache *SyntaxParsingCache = nullptr;
ASTStage_t ASTStage = Parsing;
virtual bool walk(ASTWalker &walker) override;
void dump(raw_ostream &os) const;
bool shouldBuildSyntaxTree() const;
syntax::SourceFileSyntax getSyntaxRoot() const;
void setSyntaxRoot(syntax::SourceFileSyntax &&Root);
std::unique_ptr<SourceFileSyntaxInfo> SyntaxInfo;
}
25. Syntax
/// The main handle for syntax nodes - subclasses contain all public
/// structured editing APIs.
///
/// This opaque structure holds two pieces of data: a strong reference to a
/// root node and a weak reference to the node itself. The node of interest can
/// be weakly held because the data nodes contain strong references to
/// their children.
class Syntax {
/// A strong reference to the root node of the tree in which this piece of
/// syntax resides.
const RC<SyntaxData> Root;
/// A raw pointer to the data representing this syntax node.
/// This is mutable for being able to set cached child members, which are
/// lazily created.
mutable const SyntaxData *Data;
}
26. SyntaxData
/// The class for holding parented syntax.
///
/// This structure should not contain significant public
/// API or internal modification API.
///
/// This is only for holding a strong reference to the RawSyntax, a weak
/// reference to the parent, and, in subclasses, lazily created strong
/// references to non-terminal child nodes.
class SyntaxData final {
using RootDataPair = std::pair<RC<SyntaxData>, RC<SyntaxData>>;
/// The shared raw syntax representing this syntax data node.
const RC<RawSyntax> Raw;
/// The parent of this syntax.
const SyntaxData *Parent;
/// The index into the parent's child layout.
const CursorIndex IndexInParent;
}
27. RawSyntax
/// RawSyntax - the strictly immutable, shared backing nodes for all syntax.
///
/// This is implementation detail - do not expose it in public API.
class RawSyntax final {
/// An ID of this node that is stable across incremental parses
SyntaxNodeId NodeId;
union {
uint64_t OpaqueBits;
struct {...} Common;
struct {...} Layout;
struct {...} Token;
} Bits;
}
34. ASTContext
/// ASTContext - This object creates and owns the AST objects.
/// However, this class does more than just maintain context within an AST.
/// It is the closest thing to thread-local or compile-local storage in this
/// code base. Why? SourceKit uses this code with multiple threads per Unix
/// process. Each thread processes a different source file. Each thread has its
/// own instance of ASTContext, and that instance persists for the duration of
/// the thread, throughout all phases of the compilation. (The name "ASTContext"
/// is a bit of a misnomer here.) Why not use thread-local storage? This code
/// may use DispatchQueues and pthread-style TLS won't work with code that uses
/// DispatchQueues. Summary: if you think you need a global or static variable,
/// you probably need to put it here instead.
class ASTContext final {...}
35. ASTDumper
class PrintPattern : public PatternVisitor<PrintPattern> {...}
class PrintDecl : public DeclVisitor<PrintDecl> {...}
class PrintStmt : public StmtVisitor<PrintStmt> {...}
class PrintExpr : public ExprVisitor<PrintExpr> {...}
class PrintType : public TypeVisitor<PrintType, void, StringRef> {...}
36. Decl/Stmt/Expr
/// TopLevelCodeDecl - This decl is used as a container for top-level
/// expressions and statements in the main module. It is always a direct
/// child of a SourceFile.
class TopLevelCodeDecl : public DeclContext, public Decl {...}
/// BraceStmt - A brace enclosed sequence of expressions, stmts, or decls, like
/// { var x = 10; print(10) }.
class BraceStmt final : public Stmt,
private llvm::TrailingObjects<BraceStmt, ASTNode> {...}
/// Integer literal with a '+' or '-' sign, like '+4' or '- 2'.
class IntegerLiteralExpr : public NumberLiteralExpr {...}
61. Syntax Tree
parseExprParen() {
SyntaxParsingContext LocalCtxt(SyntaxKind::ParenExpr, SyntaxContext);
consumeToken(tok::l_paren) // In consumeToken(), a RawTokenSyntax is
// added to the context.
parseExpr(); // On returning from parseExpr(), a Expr Syntax node is
// created and added to the context.
consumeToken(tok::r_paren)
// Now the context holds { '(' Expr ')' }.
// From these parts, it creates ParenExpr node and add it to the parent.
}
62. Recap
â Syntax Tree is created by SyntaxParsingContext
â AST is produced by SF.Decls
63. AST
bool Parser::parseTopLevel() {
// Parse the body of the file.
SmallVector<ASTNode, 128> Items;
// Parse body
// Add newly parsed decls to the module.
for (auto Item : Items)
if (auto *D = Item.dyn_cast<Decl*>())
SF.Decls.push_back(D);
}
74. How do we optionally
generate libSyntax tree
while also producing
semantic AST?
75. Option
// LangOptions.h
/// A collection of options that affect the language dialect and
/// provide compiler debugging facilities.
class LangOptions {
/// Whether to parse syntax tree. If the syntax tree is built, the generated
/// AST may not be correct when syntax nodes are reused as part of
/// incrementals parsing.
bool BuildSyntaxTree = false;
}
// SyntaxParsingContext.h
class alignas(1 << SyntaxAlignInBits) SyntaxParsingContext {
// If false, context does nothing.
bool Enabled;
}
76. SyntaxParsingContext
// SyntaxParsingContext.cpp
/// Add Token with Trivia to the parts.
void SyntaxParsingContext::addToken(Token &Tok,
const ParsedTrivia &LeadingTrivia,
const ParsedTrivia &TrailingTrivia) {
if (!Enabled)
return;
...
}
ParsedRawSyntaxNode SyntaxParsingContext::finalizeRoot() {
if (!Enabled)
...
}
86. BeneïŹts
â Robust architecture
â SwiftSyntax parsing performance improvements
â Enable future work for making the compiler
pipeline more suitable for interactive contexts
92. HiddenLibSyntaxAction
// HiddenLibSyntaxAction.h
/// Holds an explicitly provided action and uses it to handle all function
/// calls. Also hides an implicit SyntaxTreeCreator and ensures libSyntax nodes
/// are always created. Provides an interface to map results of the explicitly
/// provided action to the hidden libSyntax action.
class HiddenLibSyntaxAction : public SyntaxParseActions {
struct Node {
OpaqueSyntaxNode ExplicitActionNode;
OpaqueSyntaxNode LibSyntaxNode;
...
};
// I assume this can be `CLibParseActions` for SwiftSyntax?
std::shared_ptr<SyntaxParseActions> ExplicitAction;
std::shared_ptr<SyntaxTreeCreator> LibSyntaxAction;
...
}
93. LibSyntaxGenerator
/// Generates libSyntax nodes either by looking them up using
/// HiddenLibSyntaxAction (based on provided OpaqueSyntaxNode) or by recording
/// them with ParsedRawSyntaxRecorder.
class LibSyntaxGenerator {
std::shared_ptr<HiddenLibSyntaxAction> Actions;
ParsedRawSyntaxRecorder Recorder;
...
}
94. ASTGen
// ASTGen.h
class ASTGen {
ASTContext &Context;
IntegerLiteralExpr *generate(syntax::IntegerLiteralExprSyntax &Expr);
}
IntegerLiteralExpr *ASTGen::generate(IntegerLiteralExprSyntax &Expr) {
TokenSyntax Digits = Expr.getDigits();
StringRef Text = copyAndStripUnderscores(Digits.getText());
SourceLoc Loc = topLoc();
return new (Context) IntegerLiteralExpr(Text, Loc);
}
95. parseExprAST
template <typename SyntaxNode>
ParserResult<Expr> Parser::parseExprAST() {
auto ParsedExpr = parseExprSyntax<SyntaxNode>();
SyntaxContext->addSyntax(ParsedExpr);
...
auto Expr = SyntaxContext->topNode<SyntaxNode>();
auto ExprAST = Generator.generate(Expr);
return makeParserResult(ExprAST);
}