conjure_cp_essence_parser/diagnostics/
semantic_tokens.rs1use crate::diagnostics::diagnostics_api::SymbolKind;
2use crate::diagnostics::source_map::SourceMap;
3use crate::parser::syntax_errors::line_start_byte;
4
5pub const TOKEN_TYPE_NUMBER: u32 = 0;
6pub const TOKEN_TYPE_FUNCTION: u32 = 1;
7pub const TOKEN_TYPE_VARIABLE: u32 = 2;
8pub const TOKEN_TYPE_LETTING: u32 = 3;
9pub const TOKEN_TYPE_FIND: u32 = 4;
10pub const TOKEN_TYPE_DOMAIN: u32 = 5;
11pub const TOKEN_TYPE_LETTINGVAR: u32 = 6;
12pub const TOKEN_TYPE_FINDVAR: u32 = 7;
13pub const TOKEN_TYPE_GIVEN: u32 = 8;
14pub const TOKEN_TYPE_GIVENVAR: u32 = 9;
15
16pub const MODIFIER_DECLARATION: u32 = 0;
17pub const MODIFIER_READONLY: u32 = 1;
18
19pub struct TokenEncoding {
20 pub ty: u32,
21 pub modifiers: u32,
22}
23
24fn utf16_units(bytes: &[u8]) -> u32 {
25 String::from_utf8_lossy(bytes).encode_utf16().count() as u32
26}
27
28fn line_start_offsets(source: &[u8]) -> Vec<usize> {
29 let mut starts = vec![0usize];
30 for (idx, b) in source.iter().enumerate() {
31 if *b == b'\n' {
32 starts.push(idx + 1);
33 }
34 }
35 starts
36}
37
38fn line_index_at_byte(line_starts: &[usize], byte: usize) -> usize {
39 line_starts
41 .partition_point(|&start| start <= byte)
42 .saturating_sub(1)
43}
44
45pub fn token_encoding(kind: &SymbolKind) -> Option<TokenEncoding> {
47 match kind {
48 SymbolKind::Integer => Some(TokenEncoding {
49 ty: TOKEN_TYPE_NUMBER,
50 modifiers: 0,
51 }),
52 SymbolKind::Decimal => Some(TokenEncoding {
53 ty: TOKEN_TYPE_NUMBER,
54 modifiers: 0,
55 }),
56 SymbolKind::Function => Some(TokenEncoding {
57 ty: TOKEN_TYPE_FUNCTION,
58 modifiers: 0,
59 }),
60 SymbolKind::Variable => Some(TokenEncoding {
61 ty: TOKEN_TYPE_VARIABLE,
62 modifiers: 0,
63 }),
64 SymbolKind::Constant => Some(TokenEncoding {
65 ty: TOKEN_TYPE_VARIABLE,
66 modifiers: (1 << MODIFIER_READONLY),
67 }),
68 SymbolKind::Letting => Some(TokenEncoding {
69 ty: TOKEN_TYPE_LETTING,
70 modifiers: 0,
71 }),
72 SymbolKind::Find => Some(TokenEncoding {
73 ty: TOKEN_TYPE_FIND,
74 modifiers: 0,
75 }),
76 SymbolKind::Domain => Some(TokenEncoding {
77 ty: TOKEN_TYPE_DOMAIN,
78 modifiers: 0,
79 }),
80 SymbolKind::FindVar => Some(TokenEncoding {
81 ty: TOKEN_TYPE_FINDVAR,
82 modifiers: (1 << MODIFIER_DECLARATION),
83 }),
84 SymbolKind::LettingVar => Some(TokenEncoding {
85 ty: TOKEN_TYPE_LETTINGVAR,
86 modifiers: (1 << MODIFIER_DECLARATION),
87 }),
88 SymbolKind::Given => Some(TokenEncoding {
89 ty: TOKEN_TYPE_GIVEN,
90 modifiers: 0,
91 }),
92 SymbolKind::GivenVar => Some(TokenEncoding {
93 ty: TOKEN_TYPE_GIVENVAR,
94 modifiers: (1 << MODIFIER_DECLARATION),
95 }),
96 }
97}
98
99pub fn encode_semantic_tokens(source_map: &SourceMap, source: &str) -> Vec<u32> {
102 let source_bytes = source.as_bytes();
103 let line_starts = line_start_offsets(source_bytes);
104 let mut entries: Vec<(u32, u32, u32, u32, u32)> = source_map
105 .spans
106 .iter()
107 .filter_map(|span| {
108 let kind = span.hover_info.as_ref()?.kind.as_ref()?;
109 let enc = token_encoding(kind)?;
110
111 let start_byte = span.start_byte;
112 let end_byte = span.end_byte;
113 if end_byte <= start_byte
114 || end_byte > source_bytes.len()
115 || start_byte > source_bytes.len()
116 {
117 return None;
118 }
119
120 let start_line = line_index_at_byte(&line_starts, start_byte);
121 let end_line = line_index_at_byte(&line_starts, end_byte.saturating_sub(1));
122 if start_line != end_line {
123 return None;
125 }
126
127 let line_start = line_start_byte(source_bytes, start_line);
128 if start_byte < line_start {
129 return None;
130 }
131
132 let col = utf16_units(source_bytes.get(line_start..start_byte)?);
133 let len = utf16_units(source_bytes.get(start_byte..end_byte)?);
134 if len == 0 {
135 return None;
136 }
137 Some((start_line as u32, col, len, enc.ty, enc.modifiers))
138 })
139 .collect();
140
141 entries.sort_by_key(|&(line, col, _, _, _)| (line, col));
142
143 let mut data = Vec::with_capacity(entries.len() * 5);
144 let mut prev_line = 0u32;
145 let mut prev_col = 0u32;
146
147 for (line, col, len, ty, modifiers) in entries {
148 let delta_line = line - prev_line;
149 let delta_col = if delta_line == 0 { col - prev_col } else { col };
150 data.extend_from_slice(&[delta_line, delta_col, len, ty, modifiers]);
151 prev_line = line;
152 prev_col = col;
153 }
154
155 data
156}