Skip to main content

conjure_cp_essence_parser/diagnostics/
semantic_tokens.rs

1use crate::diagnostics::diagnostics_api::SymbolKind;
2use crate::diagnostics::source_map::SourceMap;
3use crate::parser::syntax_errors::line_start_byte;
4
5pub const TOKEN_TYPE_NUMBER: u32 = 0;
6pub const TOKEN_TYPE_FUNCTION: u32 = 1;
7pub const TOKEN_TYPE_VARIABLE: u32 = 2;
8pub const TOKEN_TYPE_LETTING: u32 = 3;
9pub const TOKEN_TYPE_FIND: u32 = 4;
10pub const TOKEN_TYPE_DOMAIN: u32 = 5;
11pub const TOKEN_TYPE_LETTINGVAR: u32 = 6;
12pub const TOKEN_TYPE_FINDVAR: u32 = 7;
13pub const TOKEN_TYPE_GIVEN: u32 = 8;
14pub const TOKEN_TYPE_GIVENVAR: u32 = 9;
15
16pub const MODIFIER_DECLARATION: u32 = 0;
17pub const MODIFIER_READONLY: u32 = 1;
18
19pub struct TokenEncoding {
20    pub ty: u32,
21    pub modifiers: u32,
22}
23
24fn utf16_units(bytes: &[u8]) -> u32 {
25    String::from_utf8_lossy(bytes).encode_utf16().count() as u32
26}
27
28fn line_start_offsets(source: &[u8]) -> Vec<usize> {
29    let mut starts = vec![0usize];
30    for (idx, b) in source.iter().enumerate() {
31        if *b == b'\n' {
32            starts.push(idx + 1);
33        }
34    }
35    starts
36}
37
38fn line_index_at_byte(line_starts: &[usize], byte: usize) -> usize {
39    // index of last line start <= byte
40    line_starts
41        .partition_point(|&start| start <= byte)
42        .saturating_sub(1)
43}
44
45// maps kind in SourceMap into a TokenEncoding
46pub fn token_encoding(kind: &SymbolKind) -> Option<TokenEncoding> {
47    match kind {
48        SymbolKind::Integer => Some(TokenEncoding {
49            ty: TOKEN_TYPE_NUMBER,
50            modifiers: 0,
51        }),
52        SymbolKind::Decimal => Some(TokenEncoding {
53            ty: TOKEN_TYPE_NUMBER,
54            modifiers: 0,
55        }),
56        SymbolKind::Function => Some(TokenEncoding {
57            ty: TOKEN_TYPE_FUNCTION,
58            modifiers: 0,
59        }),
60        SymbolKind::Variable => Some(TokenEncoding {
61            ty: TOKEN_TYPE_VARIABLE,
62            modifiers: 0,
63        }),
64        SymbolKind::Constant => Some(TokenEncoding {
65            ty: TOKEN_TYPE_VARIABLE,
66            modifiers: (1 << MODIFIER_READONLY),
67        }),
68        SymbolKind::Letting => Some(TokenEncoding {
69            ty: TOKEN_TYPE_LETTING,
70            modifiers: 0,
71        }),
72        SymbolKind::Find => Some(TokenEncoding {
73            ty: TOKEN_TYPE_FIND,
74            modifiers: 0,
75        }),
76        SymbolKind::Domain => Some(TokenEncoding {
77            ty: TOKEN_TYPE_DOMAIN,
78            modifiers: 0,
79        }),
80        SymbolKind::FindVar => Some(TokenEncoding {
81            ty: TOKEN_TYPE_FINDVAR,
82            modifiers: (1 << MODIFIER_DECLARATION),
83        }),
84        SymbolKind::LettingVar => Some(TokenEncoding {
85            ty: TOKEN_TYPE_LETTINGVAR,
86            modifiers: (1 << MODIFIER_DECLARATION),
87        }),
88        SymbolKind::Given => Some(TokenEncoding {
89            ty: TOKEN_TYPE_GIVEN,
90            modifiers: 0,
91        }),
92        SymbolKind::GivenVar => Some(TokenEncoding {
93            ty: TOKEN_TYPE_GIVENVAR,
94            modifiers: (1 << MODIFIER_DECLARATION),
95        }),
96    }
97}
98
99// translate span in SourceMap into the VSCode semantic token format
100// NOTE: LSP semantic token positions and lengths are UTF-16 code units.
101pub fn encode_semantic_tokens(source_map: &SourceMap, source: &str) -> Vec<u32> {
102    let source_bytes = source.as_bytes();
103    let line_starts = line_start_offsets(source_bytes);
104    let mut entries: Vec<(u32, u32, u32, u32, u32)> = source_map
105        .spans
106        .iter()
107        .filter_map(|span| {
108            let kind = span.hover_info.as_ref()?.kind.as_ref()?;
109            let enc = token_encoding(kind)?;
110
111            let start_byte = span.start_byte;
112            let end_byte = span.end_byte;
113            if end_byte <= start_byte
114                || end_byte > source_bytes.len()
115                || start_byte > source_bytes.len()
116            {
117                return None;
118            }
119
120            let start_line = line_index_at_byte(&line_starts, start_byte);
121            let end_line = line_index_at_byte(&line_starts, end_byte.saturating_sub(1));
122            if start_line != end_line {
123                // LSP semantic token entries should not span lines.
124                return None;
125            }
126
127            let line_start = line_start_byte(source_bytes, start_line);
128            if start_byte < line_start {
129                return None;
130            }
131
132            let col = utf16_units(source_bytes.get(line_start..start_byte)?);
133            let len = utf16_units(source_bytes.get(start_byte..end_byte)?);
134            if len == 0 {
135                return None;
136            }
137            Some((start_line as u32, col, len, enc.ty, enc.modifiers))
138        })
139        .collect();
140
141    entries.sort_by_key(|&(line, col, _, _, _)| (line, col));
142
143    let mut data = Vec::with_capacity(entries.len() * 5);
144    let mut prev_line = 0u32;
145    let mut prev_col = 0u32;
146
147    for (line, col, len, ty, modifiers) in entries {
148        let delta_line = line - prev_line;
149        let delta_col = if delta_line == 0 { col - prev_col } else { col };
150        data.extend_from_slice(&[delta_line, delta_col, len, ty, modifiers]);
151        prev_line = line;
152        prev_col = col;
153    }
154
155    data
156}