1use std::{
2 io::{self, Cursor, Write},
3 ops::Range,
4};
5
6use lsp_core::prelude::*;
7use lsp_core::lsp_types::FormattingOptions;
8use ropey::Rope;
9use tracing::info;
10
11use crate::lang::model::{Base, BlankNode, Term, Triple, Turtle, TurtlePrefix, PO};
12
13#[allow(unused)]
14pub fn format(tokens: &[&Token], options: FormattingOptions) -> String {
15 let mut indent_str = String::new();
16 for _ in 0..options.tab_size {
17 indent_str += " ";
18 }
19
20 let mut indent = 0;
21 let mut document = String::new();
22 let mut line = String::new();
23 let mut wants_newline = 0;
24 let mut needs_new_line = false;
25
26 let mut listings = vec![false];
27 let mut first = true;
28
29 let mut last_open_bnode = false;
30
31 for token in tokens {
32 if last_open_bnode && token.is_sq_close() {
33 wants_newline = 0;
34 }
35
36 let space = match token {
37 Token::Stop | Token::Comma | Token::PredicateSplit => false,
38 Token::DataTypeDelim | Token::LangTag(_) => false,
39 _ => true,
40 };
41
42 if wants_newline == 0 && space && !first {
43 line += " ";
44 }
45
46 if needs_new_line || (wants_newline > 0 && !token.is_comment()) {
47 document += "\n";
48 document += &line;
49 line = String::new();
50
51 for _ in 1..wants_newline {
52 line += "\n";
53 }
54
55 for _ in 0..indent {
56 line += &indent_str;
57 }
58
59 if listings.last().copied().unwrap_or_default() {
60 line += &indent_str;
61 }
62
63 wants_newline = 0;
64 needs_new_line = false;
65 }
66
67 last_open_bnode = token.is_sq_open();
68
69 match token {
70 Token::PrefixTag => line += "@prefix",
71 Token::BaseTag => line += "@base",
72 Token::SparqlPrefix => line += "PREFIX",
73 Token::SparqlBase => line += "BASE",
74 Token::PredType => line += "a",
75 Token::SqOpen => {
76 line += "[";
77 indent += 1;
78 wants_newline = 1;
79 listings.push(false);
80 }
81 Token::SqClose => {
82 line += "]";
83 indent -= 1;
84 listings.pop();
85 }
86 Token::BracketOpen => line += "(",
87 Token::BracketClose => line += ")",
88 Token::DataTypeDelim => {
89 line += "^^";
90 }
91 Token::Stop => {
92 line += ".";
93 wants_newline = 1;
94 if let Some(l) = listings.last_mut() {
95 if *l {
96 wants_newline = 2;
97 }
98 *l = false;
99 }
100 }
101 Token::PredicateSplit => {
102 line += ";";
103 wants_newline = 1;
104 if let Some(l) = listings.last_mut() {
105 *l = true;
106 }
107 }
108 Token::Comma => line += ",",
109 Token::True => line += "true",
110 Token::False => line += "false",
111 Token::IRIRef(x) => {
112 line += "<";
113 line += x.as_str();
114 line += ">";
115 }
116 Token::PNameLN(x, y) => {
117 if let Some(x) = x {
118 line += x.as_str();
119 }
120 line += ":";
121 line += y.as_str();
122 }
123 Token::BlankNodeLabel(x) => {
124 line += "_:";
125 line += x.as_str();
126 }
127 Token::LangTag(x) => {
128 line += "@";
129 line += x.as_str();
130 }
131 Token::Number(x) => line += x,
132 Token::Str(x, y) => {
133 line += y.quote();
134 line += x;
135 line += y.quote();
136 }
137 Token::ANON => line += "[]",
138 Token::Comment(x) => {
139 line += x;
140 needs_new_line = true;
141 }
142 Token::Invalid(x) => line += x,
143 _ => todo!(),
144 }
145
146 first = false;
147 }
148
149 document += "\n";
150 document += &line;
151 document + "\n"
152}
153
154type Buf = Cursor<Vec<u8>>;
155struct FormatState<'a> {
156 indent_level: usize,
157 indent: String,
158 buf: Buf,
159 line_start: u64,
160 comments: &'a [Spanned<String>],
161 comments_idx: usize,
162 tail: Spanned<String>,
163 line_count: usize,
164}
165
166impl<'a> FormatState<'a> {
167 fn new(
168 options: FormattingOptions,
169 buf: Buf,
170 comments: &'a [Spanned<String>],
171 source: &'a Rope,
172 ) -> Self {
173 let mut indent = String::new();
174 for _ in 0..options.tab_size {
175 indent.push(' ');
176 }
177
178 let tail = spanned(
179 String::new(),
180 source.len_chars() + 1..source.len_chars() + 1,
181 );
182 Self {
183 tail,
184 line_start: 0,
185 indent_level: 0,
186 indent,
187 buf,
188 comments,
189 comments_idx: 0,
190 line_count: 0,
191 }
192 }
193
194 fn check_comments(&mut self, span: &Range<usize>) -> io::Result<bool> {
195 println!("Checking comments with span {:?}", span);
196 let mut first = true;
197 loop {
198 let current = self.comments.get(self.comments_idx).unwrap_or(&self.tail);
199
200 if current.1.start > span.start {
201 break;
202 }
203
204 first = false;
205 write!(self.buf, "{}", current.0)?;
206 self.new_line()?;
207 self.comments_idx += 1;
208 }
209 Ok(!first)
210 }
211 fn current_line_length(&self) -> u64 {
212 self.buf.position() - self.line_start
213 }
214 fn new_line(&mut self) -> io::Result<()> {
215 self.line_count += 1;
216 write!(self.buf, "\n")?;
217 self.line_start = self.buf.position();
218 for _ in 0..self.indent_level {
219 write!(self.buf, "{}", &self.indent)?;
220 }
221 Ok(())
222 }
223 fn inc(&mut self) {
224 self.indent_level += 1;
225 }
226 fn decr(&mut self) {
227 self.indent_level -= 1;
228 }
229}
230
231impl FormatState<'_> {
232 fn write_turtle(&mut self, turtle: &Turtle) -> io::Result<()> {
233 if let Some(ref b) = turtle.base {
234 self.check_comments(&b.1)?;
235 self.write_base(b)?;
236 self.new_line()?;
237 }
238 for p in &turtle.prefixes {
239 self.check_comments(&p.1)?;
240 self.write_prefix(p)?;
241 self.new_line()?;
242 }
243
244 let mut prev_line = 0;
245
246 for t in &turtle.triples {
247 if prev_line + 1 < self.line_count {
248 self.new_line()?;
249 }
250 prev_line = self.line_count;
251 self.check_comments(&t.1)?;
252 self.write_triple(&t)?;
253 self.new_line()?;
254 }
256 self.new_line()?;
257
258 for i in self.comments_idx..self.comments.len() {
259 write!(self.buf, "{}", self.comments[i].0)?;
260 self.new_line()?;
261 }
262
263 Ok(())
264 }
265
266 fn write_prefix(&mut self, prefix: &TurtlePrefix) -> io::Result<()> {
267 write!(self.buf, "@prefix {}: {}.", prefix.prefix.0, prefix.value.0)
268 }
269
270 fn write_base(&mut self, base: &Base) -> io::Result<()> {
271 write!(self.buf, "@base {}.", base.1 .0)
272 }
273
274 fn write_bnode(&mut self, bnode: &BlankNode) -> io::Result<()> {
275 match bnode {
276 BlankNode::Named(x, _) => write!(self.buf, "_:{}", x)?,
277 BlankNode::Unnamed(pos, _, _) => {
278 if pos.len() == 0 {
279 return write!(self.buf, "[ ]");
280 }
281 if pos.len() == 1 {
282 write!(self.buf, "[ ")?;
283 self.write_po(&pos[0])?;
284 return write!(self.buf, " ]");
285 }
286 let is_first_of_line = self.current_line_length() == 0;
287 self.inc();
288 write!(self.buf, "[")?;
289 let should_skip = if is_first_of_line {
290 write!(self.buf, " ")?;
291 self.write_po(&pos[0])?;
292 write!(self.buf, ";")?;
293 1
294 } else {
295 0
296 };
297 for po in pos.iter().skip(should_skip) {
298 self.new_line()?;
299 self.check_comments(&po.1)?;
300 self.write_po(&po)?;
301 write!(self.buf, ";")?;
302 }
303 self.decr();
304 self.new_line()?;
305 write!(self.buf, "]")?;
306 }
307 BlankNode::Invalid => return Err(io::Error::new(io::ErrorKind::Other, "")),
308 }
309 Ok(())
310 }
311
312 fn write_collection(&mut self, coll: &Vec<Spanned<Term>>) -> io::Result<()> {
313 if coll.is_empty() {
314 return write!(self.buf, "( )");
315 }
316
317 let mut should_indent = false;
318 let start = self.buf.position();
319 let current_line = self.line_count;
320
321 write!(self.buf, "( ")?;
322
323 self.check_comments(&coll[0].1)?;
324 self.write_term(&coll[0])?;
325
326 for po in coll.iter().skip(1) {
327 self.check_comments(&po.1)?;
328 write!(self.buf, " ")?;
329 self.write_term(&po)?;
330 if self.current_line_length() > 80 {
331 should_indent = true;
332 break;
333 }
334 }
335 write!(self.buf, " )")?;
336
337 if should_indent {
338 self.buf.set_position(start);
339 self.line_count = current_line;
340 write!(self.buf, "(")?;
341 self.inc();
342 for po in coll.iter() {
343 self.new_line()?;
344 self.check_comments(&po.1)?;
345 self.write_term(&po)?;
346 }
347 self.decr();
348 self.new_line()?;
349 write!(self.buf, ")")?;
350 }
351
352 Ok(())
353 }
354
355 fn write_term(&mut self, term: &Term) -> io::Result<()> {
356 match term {
357 Term::Literal(s) => write!(self.buf, "{}", s)?,
358 Term::BlankNode(b) => self.write_bnode(b)?,
359 Term::NamedNode(n) => write!(self.buf, "{}", n)?,
360 Term::Collection(ts) => self.write_collection(ts)?,
361 Term::Invalid => {
362 return Err(io::Error::new(
363 io::ErrorKind::Other,
364 "cannot format turtle with invalid terms",
365 ))
366 }
367 Term::Variable(_) => {
368 return Err(io::Error::new(
369 io::ErrorKind::Other,
370 "cannot format turtle with variables",
371 ))
372 }
373 }
374 Ok(())
375 }
376
377 fn write_po(&mut self, po: &PO) -> io::Result<()> {
378 write!(self.buf, "{} ", po.predicate.0)?;
379 self.write_term(&po.object[0])?;
380 let mut should_indent = false;
381
382 let start = self.buf.position();
383 let current_line = self.line_count;
384 for i in 1..po.object.len() {
385 write!(self.buf, ", ")?;
386 self.write_term(&po.object[i])?;
387
388 if self.current_line_length() > 80 {
389 should_indent = true;
390 break;
391 }
392 }
393
394 if should_indent {
395 self.buf.set_position(start);
396 self.line_count = current_line;
397 self.inc();
398 for i in 1..po.object.len() {
399 write!(self.buf, ",")?;
400 self.new_line()?;
401 self.check_comments(&po.object[i].1)?;
402 self.write_term(&po.object[i])?;
403 }
404 self.decr();
405 }
406
407 Ok(())
408 }
409
410 fn write_triple(&mut self, triple: &Triple) -> io::Result<()> {
411 match &triple.subject.0 {
412 Term::BlankNode(bn) => self.write_bnode(bn)?,
413 Term::NamedNode(n) => write!(self.buf, "{}", n)?,
414 _ => write!(self.buf, "invalid")?,
415 }
416 write!(self.buf, " ")?;
417 self.write_po(&triple.po[0])?;
418 if triple.po.len() == 1 {
419 write!(self.buf, ".")?;
420 return Ok(());
421 }
422 write!(self.buf, ";")?;
423 self.inc();
424
425 self.new_line()?;
426 self.check_comments(&triple.po[1].1)?;
427 self.write_po(&triple.po[1])?;
428
429 if triple.po.len() == 2 {
430 self.decr();
431 write!(self.buf, ".")?;
432 return Ok(());
433 }
434
435 for i in 2..triple.po.len() {
436 write!(self.buf, ";")?;
437 self.new_line()?;
438 self.check_comments(&triple.po[i].1)?;
439 self.write_po(&triple.po[i])?;
440 }
441
442 write!(self.buf, ".")?;
443 self.decr();
444 Ok(())
445 }
446}
447
448pub fn format_turtle(
449 turtle: &Turtle,
450 config: FormattingOptions,
451 comments: &[Spanned<String>],
452 source: &Rope,
453) -> Option<String> {
454 let buf: Buf = Cursor::new(Vec::new());
455 let mut state = FormatState::new(config, buf, comments, source);
456 match state.write_turtle(turtle) {
457 Ok(_) => info!("Format succesful"),
458 Err(e) => {
459 info!("Format unsuccesful {:?}", e);
460 return None;
461 }
462 }
463 String::from_utf8(state.buf.into_inner()).ok()
464}
465
466#[cfg(test)]
467mod tests {
468
469 use std::str::FromStr;
470
471 use lsp_core::prelude::{Spanned, Token};
472 use ropey::Rope;
473
474 use crate::lang::{
475 context::Context, formatter::format_turtle, model::Turtle, parser as parser2,
476 tokenizer::parse_tokens_str_safe,
477 };
478
479 #[derive(Debug)]
480 pub enum Err {
481 Tokenizing,
482 Parsing,
483 }
484
485 fn parse_turtle(
486 inp: &str,
487 url: &lsp_core::lsp_types::Url,
488 ) -> Result<(Turtle, Vec<Spanned<String>>), Err> {
489 let context = Context::new();
490 let ctx = context.ctx();
491 let tokens = parse_tokens_str_safe(inp).map_err(|e| {
492 println!("Error {:?}", e);
493 Err::Tokenizing
494 })?;
495
496 let mut comments: Vec<_> = tokens
497 .iter()
498 .flat_map(|x| {
499 x.try_map_ref(|t| match t {
500 Token::Comment(x) => Some(x.clone()),
501 _ => None,
502 })
503 })
504 .collect();
505 comments.sort_by_key(|x| x.1.start);
506
507 let (turtle, errs) = parser2::parse_turtle(&url, tokens, inp.len(), ctx);
508 for e in errs {
509 println!("Error {:?}", e);
510 }
511
512 Ok((turtle.into_value(), comments))
513 }
514
515 #[test]
516 fn easy_format() {
517 let txt = r#"
518@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
519@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
520@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
521
522[] a foaf:Name;
523 foaf:knows <abc>;.
524"#;
525
526 let expected = r#"@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
527@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
528@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
529
530[ ] a foaf:Name;
531 foaf:knows <abc>.
532
533"#;
534
535 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
536 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
537 let formatted = format_turtle(
538 &output,
539 lsp_core::lsp_types::FormattingOptions {
540 tab_size: 2,
541 ..Default::default()
542 },
543 &comments,
544 &Rope::from_str(txt),
545 )
546 .expect("formatting");
547 assert_eq!(formatted, expected);
548 }
549
550 #[test]
551 fn harder_format_pos() {
552 let txt = r#"
553[] a foaf:Name;
554 foaf:knows <abc>; foaf:knows2 <abc>.
555
556"#;
557
558 let expected = r#"[ ] a foaf:Name;
559 foaf:knows <abc>;
560 foaf:knows2 <abc>.
561
562"#;
563
564 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
565 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
566 let formatted = format_turtle(
567 &output,
568 lsp_core::lsp_types::FormattingOptions {
569 tab_size: 2,
570 ..Default::default()
571 },
572 &comments,
573 &Rope::from_str(txt),
574 )
575 .expect("formatting");
576 assert_eq!(formatted, expected);
577 }
578
579 #[test]
580 fn format_blanknodes() {
581 let txt = r#"
582 [ <a> foaf:Person; foaf:knows <abc>; foaf:knows <def> ] foaf:knows [
583 a foaf:Person;
584 foaf:knows <abc>;
585 foaf:knows <def>;
586 ] .
587
588"#;
589
590 let expected = r#"[ <a> foaf:Person;
591 foaf:knows <abc>;
592 foaf:knows <def>;
593] foaf:knows [
594 a foaf:Person;
595 foaf:knows <abc>;
596 foaf:knows <def>;
597].
598
599"#;
600
601 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
602 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
603 let formatted = format_turtle(
604 &output,
605 lsp_core::lsp_types::FormattingOptions {
606 tab_size: 2,
607 ..Default::default()
608 },
609 &comments,
610 &Rope::from_str(txt),
611 )
612 .expect("formatting");
613 assert_eq!(formatted, expected);
614 }
615
616 #[test]
617 fn long_objectlist() {
618 let txt = r#"
619 <abc> a <something-long>, <something-longer-still>, <something-longer>, <something-tes>, <soemthing-eeeellssee>.
620"#;
621
622 let expected = r#"<abc> a <something-long>,
623 <something-longer-still>,
624 <something-longer>,
625 <something-tes>,
626 <soemthing-eeeellssee>.
627
628"#;
629
630 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
631 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
632 let formatted = format_turtle(
633 &output,
634 lsp_core::lsp_types::FormattingOptions {
635 tab_size: 2,
636 ..Default::default()
637 },
638 &comments,
639 &Rope::from_str(txt),
640 )
641 .expect("formatting");
642 assert_eq!(formatted, expected);
643 }
644
645 #[test]
646 fn short_collection() {
647 let txt = r#"
648 <abc> a (), (<abc> <def>).
649"#;
650
651 let expected = r#"<abc> a ( ), ( <abc> <def> ).
652
653"#;
654
655 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
656 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
657 let formatted = format_turtle(
658 &output,
659 lsp_core::lsp_types::FormattingOptions {
660 tab_size: 2,
661 ..Default::default()
662 },
663 &comments,
664 &Rope::from_str(txt),
665 )
666 .expect("formatting");
667 assert_eq!(formatted, expected);
668 }
669
670 #[test]
671 fn long_collection() {
672 let txt = r#"
673 <abc> a (), (<somevery-very-very-long-item> <and-othersss> <and-ottteeehs> <wheeeeeeeeeeeee>).
674"#;
675
676 let expected = r#"<abc> a ( ), (
677 <somevery-very-very-long-item>
678 <and-othersss>
679 <and-ottteeehs>
680 <wheeeeeeeeeeeee>
681).
682
683"#;
684
685 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
686 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
687 let formatted = format_turtle(
688 &output,
689 lsp_core::lsp_types::FormattingOptions {
690 tab_size: 2,
691 ..Default::default()
692 },
693 &comments,
694 &Rope::from_str(txt),
695 )
696 .expect("formatting");
697 assert_eq!(formatted, expected);
698 }
699
700 #[test]
701 fn easy_comments() {
702 let txt = r#"
703# Test this is a cool test or something!
704 # Another comment!
705
706[] a foaf:Name;
707 foaf:knows <abc>; foaf:knows2 <abc>.
708
709"#;
710
711 let expected = r#"# Test this is a cool test or something!
712# Another comment!
713[ ] a foaf:Name;
714 foaf:knows <abc>;
715 foaf:knows2 <abc>.
716
717"#;
718
719 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
720 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
721 println!("OUtput {:?}", output);
722 let formatted = format_turtle(
723 &output,
724 lsp_core::lsp_types::FormattingOptions {
725 tab_size: 2,
726 ..Default::default()
727 },
728 &comments,
729 &Rope::from_str(txt),
730 )
731 .expect("formatting");
732 assert_eq!(formatted, expected);
733 }
734
735 #[test]
736 fn hard_comments() {
737 let txt = r#"
738
739[] a foaf:Name; # Nested comment
740 foaf:knows <abc>; # Another comment!
741 foaf:knows2 <abc>.
742
743 #trailing comments
744"#;
745
746 let expected = r#"[ ] a foaf:Name;
747 # Nested comment
748 foaf:knows <abc>;
749 # Another comment!
750 foaf:knows2 <abc>.
751
752#trailing comments
753"#;
754 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
755 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
756 let formatted = format_turtle(
757 &output,
758 lsp_core::lsp_types::FormattingOptions {
759 tab_size: 2,
760 ..Default::default()
761 },
762 &comments,
763 &Rope::from_str(txt),
764 )
765 .expect("formatting");
766 assert_eq!(formatted, expected);
767 }
768
769 #[test]
770 fn bug_1() {
771 let txt = r#"
772[] a sh:NodeShape;
773 sh:targetClass js:Echo;
774 sh:property [
775 sh:class :ReaderChannel;
776 sh:path js:input;
777 sh:name "Input Channel"
778 ], [
779 sh:class :WriterChannel;
780 sh:path js:output;
781 sh:name "Output Channel"
782 ].
783
784"#;
785
786 let expected = r#"[ ] a sh:NodeShape;
787 sh:targetClass js:Echo;
788 sh:property [
789 sh:class :ReaderChannel;
790 sh:path js:input;
791 sh:name "Input Channel";
792 ], [
793 sh:class :WriterChannel;
794 sh:path js:output;
795 sh:name "Output Channel";
796 ].
797
798"#;
799
800 let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
801 let (output, comments) = parse_turtle(txt, &url).expect("Simple");
802 let formatted = format_turtle(
803 &output,
804 lsp_core::lsp_types::FormattingOptions {
805 tab_size: 2,
806 ..Default::default()
807 },
808 &comments,
809 &Rope::from_str(txt),
810 )
811 .expect("formatting");
812 assert_eq!(formatted, expected);
813 }
814}