Skip to main content

conjure_cp_essence_parser/parser/
util.rs

1use std::collections::BTreeMap;
2
3use tree_sitter::{Node, Parser, Tree};
4use tree_sitter_essence::LANGUAGE;
5
6use super::traversal::WalkDFS;
7use crate::diagnostics::diagnostics_api::SymbolKind;
8use crate::diagnostics::source_map::{HoverInfo, SourceMap, SpanId, span_with_hover};
9use crate::errors::RecoverableParseError;
10use conjure_cp_core::ast::{Name, SymbolTablePtr};
11
12/// Context for parsing, containing shared state passed through parser functions.
13pub struct ParseContext<'a> {
14    pub source_code: &'a str,
15    pub root: &'a Node<'a>,
16    pub symbols: Option<SymbolTablePtr>,
17    pub errors: &'a mut Vec<RecoverableParseError>,
18    pub source_map: &'a mut SourceMap,
19    pub decl_spans: &'a mut BTreeMap<Name, SpanId>,
20    pub typechecking_context: TypecheckingContext,
21}
22
23impl<'a> ParseContext<'a> {
24    pub fn new(
25        source_code: &'a str,
26        root: &'a Node<'a>,
27        symbols: Option<SymbolTablePtr>,
28        errors: &'a mut Vec<RecoverableParseError>,
29        source_map: &'a mut SourceMap,
30        decl_spans: &'a mut BTreeMap<Name, SpanId>,
31    ) -> Self {
32        Self {
33            source_code,
34            root,
35            symbols,
36            errors,
37            source_map,
38            decl_spans,
39            typechecking_context: TypecheckingContext::Unknown,
40        }
41    }
42
43    pub fn record_error(&mut self, error: RecoverableParseError) {
44        self.errors.push(error);
45    }
46
47    /// Create a new ParseContext with different symbols but sharing source_code, root, errors, and source_map.
48    pub fn with_new_symbols(&mut self, symbols: Option<SymbolTablePtr>) -> ParseContext<'_> {
49        ParseContext {
50            source_code: self.source_code,
51            root: self.root,
52            symbols,
53            errors: self.errors,
54            source_map: self.source_map,
55            decl_spans: self.decl_spans,
56            typechecking_context: self.typechecking_context,
57        }
58    }
59
60    pub fn save_decl_span(&mut self, name: Name, span_id: SpanId) {
61        self.decl_spans.insert(name, span_id);
62    }
63
64    pub fn lookup_decl_span(&self, name: &Name) -> Option<SpanId> {
65        self.decl_spans.get(name).copied()
66    }
67
68    pub fn lookup_decl_line(&self, name: &Name) -> Option<u32> {
69        let span_id = self.lookup_decl_span(name)?;
70        let span = self.source_map.spans.get(span_id as usize)?;
71        Some(span.start_point.line + 1)
72    }
73
74    /// Helper to add to span and documentation hover info into the source map
75    pub fn add_span_and_doc_hover(
76        &mut self,
77        node: &tree_sitter::Node,
78        doc_key: &str, // name of the documentation file in Bits
79        kind: SymbolKind,
80        ty: Option<String>,
81        decl_span: Option<u32>,
82    ) {
83        if let Some(description) = get_documentation(doc_key) {
84            let hover = HoverInfo {
85                description,
86                kind: Some(kind),
87                ty,
88                decl_span,
89            };
90            span_with_hover(node, self.source_code, self.source_map, hover);
91        }
92        // If documentation is not found, do nothing (no fallback, no addition to source map)
93    }
94}
95
96// Used to detect type mismatches during parsing.
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub enum TypecheckingContext {
99    Boolean,
100    Arithmetic,
101    Set,
102    /// Context is unknown or flexible
103    Unknown,
104}
105
106/// Parse the given source code into a syntax tree using tree-sitter.
107///
108/// If successful, returns a tuple containing the syntax tree and the raw source code.
109/// If the source code is not valid Essence, returns None.
110pub fn get_tree(src: &str) -> Option<(Tree, String)> {
111    let mut parser = Parser::new();
112    parser.set_language(&LANGUAGE.into()).unwrap();
113
114    parser.parse(src, None).and_then(|tree| {
115        let root = tree.root_node();
116        if root.is_error() {
117            return None;
118        }
119        Some((tree, src.to_string()))
120    })
121}
122
123/// Parse an expression fragment, allowing a dummy prefix for error recovery.
124///
125/// NOTE: The new source code may be different from the original source code.
126///       See implementation for details.
127pub fn get_expr_tree(src: &str) -> Option<(Tree, String)> {
128    let mut parser = Parser::new();
129    parser.set_language(&LANGUAGE.into()).unwrap();
130
131    parser.parse(src, None).and_then(|tree| {
132        let root = tree.root_node();
133        if root.is_error() {
134            return None;
135        }
136
137        let children: Vec<_> = named_children(&root).collect();
138        let first_child = children.first()?;
139
140        // HACK: Tree-sitter can only parse a complete program from top to bottom, not an individual bit of syntax.
141        // See: https://github.com/tree-sitter/tree-sitter/issues/711 and linked issues.
142        // However, we can use a dummy _FRAGMENT_EXPRESSION prefix (which we insert as necessary)
143        // to trick the parser into accepting an isolated expression.
144        // This way we can parse an isolated expression and it is only slightly cursed :)
145        if first_child.is_error() {
146            if src.starts_with("_FRAGMENT_EXPRESSION") {
147                None
148            } else {
149                get_expr_tree(&format!("_FRAGMENT_EXPRESSION {src}"))
150            }
151        } else {
152            Some((tree, src.to_string()))
153        }
154    })
155}
156
157/// Get the named children of a node
158pub fn named_children<'a>(node: &'a Node<'a>) -> impl Iterator<Item = Node<'a>> + 'a {
159    (0..node.named_child_count())
160        .filter_map(|i| u32::try_from(i).ok().and_then(|i| node.named_child(i)))
161}
162
163pub fn node_is_expression(node: &Node) -> bool {
164    matches!(
165        node.kind(),
166        "bool_expr" | "arithmetic_expr" | "comparison_expr" | "atom"
167    )
168}
169
170/// Get all top-level nodes that match the given predicate
171pub fn query_toplevel<'a>(
172    node: &'a Node<'a>,
173    predicate: &'a dyn Fn(&Node<'a>) -> bool,
174) -> impl Iterator<Item = Node<'a>> + 'a {
175    WalkDFS::with_retract(node, predicate).filter(|n| n.is_named() && predicate(n))
176}
177
178/// Get all meta-variable names in a node
179pub fn get_metavars<'a>(node: &'a Node<'a>, src: &'a str) -> impl Iterator<Item = String> + 'a {
180    query_toplevel(node, &|n| n.kind() == "metavar").filter_map(|child| {
181        child
182            .named_child(0)
183            .map(|name| src[name.start_byte()..name.end_byte()].to_string())
184    })
185}
186
187/// Fetch Essence syntax documentation from Conjure's `docs/bits/` folder on GitHub.
188///
189/// `name` is the name of the documentation file (without .md suffix). If the file is not found or an error occurs, returns None.
190pub fn get_documentation(name: &str) -> Option<String> {
191    let mut base = name.to_string();
192    if let Some(stripped) = base.strip_suffix(".md") {
193        base = stripped.to_string();
194    }
195
196    // This url is for raw Markdown bytes
197    let url =
198        format!("https://raw.githubusercontent.com/conjure-cp/conjure/main/docs/bits/{base}.md");
199
200    let output = std::process::Command::new("curl")
201        .args(["-fsSL", &url])
202        .output()
203        .ok()?;
204
205    if !output.status.success() {
206        return None;
207    }
208
209    String::from_utf8(output.stdout).ok()
210}
211
212mod test {
213    #[allow(unused)]
214    use super::*;
215
216    #[test]
217    fn test_get_metavars() {
218        let src = "such that &x = y";
219        let (tree, _) = get_tree(src).unwrap();
220        let root = tree.root_node();
221        let metavars = get_metavars(&root, src).collect::<Vec<_>>();
222        assert_eq!(metavars, vec!["x"]);
223    }
224}