lang_turtle/lang/
formatter.rs

1use std::{
2    io::{self, Cursor, Write},
3    ops::Range,
4};
5
6use lsp_core::prelude::*;
7use lsp_core::lsp_types::FormattingOptions;
8use ropey::Rope;
9use tracing::info;
10
11use crate::lang::model::{Base, BlankNode, Term, Triple, Turtle, TurtlePrefix, PO};
12
13#[allow(unused)]
14pub fn format(tokens: &[&Token], options: FormattingOptions) -> String {
15    let mut indent_str = String::new();
16    for _ in 0..options.tab_size {
17        indent_str += " ";
18    }
19
20    let mut indent = 0;
21    let mut document = String::new();
22    let mut line = String::new();
23    let mut wants_newline = 0;
24    let mut needs_new_line = false;
25
26    let mut listings = vec![false];
27    let mut first = true;
28
29    let mut last_open_bnode = false;
30
31    for token in tokens {
32        if last_open_bnode && token.is_sq_close() {
33            wants_newline = 0;
34        }
35
36        let space = match token {
37            Token::Stop | Token::Comma | Token::PredicateSplit => false,
38            Token::DataTypeDelim | Token::LangTag(_) => false,
39            _ => true,
40        };
41
42        if wants_newline == 0 && space && !first {
43            line += " ";
44        }
45
46        if needs_new_line || (wants_newline > 0 && !token.is_comment()) {
47            document += "\n";
48            document += &line;
49            line = String::new();
50
51            for _ in 1..wants_newline {
52                line += "\n";
53            }
54
55            for _ in 0..indent {
56                line += &indent_str;
57            }
58
59            if listings.last().copied().unwrap_or_default() {
60                line += &indent_str;
61            }
62
63            wants_newline = 0;
64            needs_new_line = false;
65        }
66
67        last_open_bnode = token.is_sq_open();
68
69        match token {
70            Token::PrefixTag => line += "@prefix",
71            Token::BaseTag => line += "@base",
72            Token::SparqlPrefix => line += "PREFIX",
73            Token::SparqlBase => line += "BASE",
74            Token::PredType => line += "a",
75            Token::SqOpen => {
76                line += "[";
77                indent += 1;
78                wants_newline = 1;
79                listings.push(false);
80            }
81            Token::SqClose => {
82                line += "]";
83                indent -= 1;
84                listings.pop();
85            }
86            Token::BracketOpen => line += "(",
87            Token::BracketClose => line += ")",
88            Token::DataTypeDelim => {
89                line += "^^";
90            }
91            Token::Stop => {
92                line += ".";
93                wants_newline = 1;
94                if let Some(l) = listings.last_mut() {
95                    if *l {
96                        wants_newline = 2;
97                    }
98                    *l = false;
99                }
100            }
101            Token::PredicateSplit => {
102                line += ";";
103                wants_newline = 1;
104                if let Some(l) = listings.last_mut() {
105                    *l = true;
106                }
107            }
108            Token::Comma => line += ",",
109            Token::True => line += "true",
110            Token::False => line += "false",
111            Token::IRIRef(x) => {
112                line += "<";
113                line += x.as_str();
114                line += ">";
115            }
116            Token::PNameLN(x, y) => {
117                if let Some(x) = x {
118                    line += x.as_str();
119                }
120                line += ":";
121                line += y.as_str();
122            }
123            Token::BlankNodeLabel(x) => {
124                line += "_:";
125                line += x.as_str();
126            }
127            Token::LangTag(x) => {
128                line += "@";
129                line += x.as_str();
130            }
131            Token::Number(x) => line += x,
132            Token::Str(x, y) => {
133                line += y.quote();
134                line += x;
135                line += y.quote();
136            }
137            Token::ANON => line += "[]",
138            Token::Comment(x) => {
139                line += x;
140                needs_new_line = true;
141            }
142            Token::Invalid(x) => line += x,
143            _ => todo!(),
144        }
145
146        first = false;
147    }
148
149    document += "\n";
150    document += &line;
151    document + "\n"
152}
153
154type Buf = Cursor<Vec<u8>>;
155struct FormatState<'a> {
156    indent_level: usize,
157    indent: String,
158    buf: Buf,
159    line_start: u64,
160    comments: &'a [Spanned<String>],
161    comments_idx: usize,
162    tail: Spanned<String>,
163    line_count: usize,
164}
165
166impl<'a> FormatState<'a> {
167    fn new(
168        options: FormattingOptions,
169        buf: Buf,
170        comments: &'a [Spanned<String>],
171        source: &'a Rope,
172    ) -> Self {
173        let mut indent = String::new();
174        for _ in 0..options.tab_size {
175            indent.push(' ');
176        }
177
178        let tail = spanned(
179            String::new(),
180            source.len_chars() + 1..source.len_chars() + 1,
181        );
182        Self {
183            tail,
184            line_start: 0,
185            indent_level: 0,
186            indent,
187            buf,
188            comments,
189            comments_idx: 0,
190            line_count: 0,
191        }
192    }
193
194    fn check_comments(&mut self, span: &Range<usize>) -> io::Result<bool> {
195        println!("Checking comments with span {:?}", span);
196        let mut first = true;
197        loop {
198            let current = self.comments.get(self.comments_idx).unwrap_or(&self.tail);
199
200            if current.1.start > span.start {
201                break;
202            }
203
204            first = false;
205            write!(self.buf, "{}", current.0)?;
206            self.new_line()?;
207            self.comments_idx += 1;
208        }
209        Ok(!first)
210    }
211    fn current_line_length(&self) -> u64 {
212        self.buf.position() - self.line_start
213    }
214    fn new_line(&mut self) -> io::Result<()> {
215        self.line_count += 1;
216        write!(self.buf, "\n")?;
217        self.line_start = self.buf.position();
218        for _ in 0..self.indent_level {
219            write!(self.buf, "{}", &self.indent)?;
220        }
221        Ok(())
222    }
223    fn inc(&mut self) {
224        self.indent_level += 1;
225    }
226    fn decr(&mut self) {
227        self.indent_level -= 1;
228    }
229}
230
231impl FormatState<'_> {
232    fn write_turtle(&mut self, turtle: &Turtle) -> io::Result<()> {
233        if let Some(ref b) = turtle.base {
234            self.check_comments(&b.1)?;
235            self.write_base(b)?;
236            self.new_line()?;
237        }
238        for p in &turtle.prefixes {
239            self.check_comments(&p.1)?;
240            self.write_prefix(p)?;
241            self.new_line()?;
242        }
243
244        let mut prev_line = 0;
245
246        for t in &turtle.triples {
247            if prev_line + 1 < self.line_count {
248                self.new_line()?;
249            }
250            prev_line = self.line_count;
251            self.check_comments(&t.1)?;
252            self.write_triple(&t)?;
253            self.new_line()?;
254            // request_newline = t.0.po.len() > 1 || t.0.po[0].0.object.len() > 1;
255        }
256        self.new_line()?;
257
258        for i in self.comments_idx..self.comments.len() {
259            write!(self.buf, "{}", self.comments[i].0)?;
260            self.new_line()?;
261        }
262
263        Ok(())
264    }
265
266    fn write_prefix(&mut self, prefix: &TurtlePrefix) -> io::Result<()> {
267        write!(self.buf, "@prefix {}: {}.", prefix.prefix.0, prefix.value.0)
268    }
269
270    fn write_base(&mut self, base: &Base) -> io::Result<()> {
271        write!(self.buf, "@base {}.", base.1 .0)
272    }
273
274    fn write_bnode(&mut self, bnode: &BlankNode) -> io::Result<()> {
275        match bnode {
276            BlankNode::Named(x, _) => write!(self.buf, "_:{}", x)?,
277            BlankNode::Unnamed(pos, _, _) => {
278                if pos.len() == 0 {
279                    return write!(self.buf, "[ ]");
280                }
281                if pos.len() == 1 {
282                    write!(self.buf, "[ ")?;
283                    self.write_po(&pos[0])?;
284                    return write!(self.buf, " ]");
285                }
286                let is_first_of_line = self.current_line_length() == 0;
287                self.inc();
288                write!(self.buf, "[")?;
289                let should_skip = if is_first_of_line {
290                    write!(self.buf, " ")?;
291                    self.write_po(&pos[0])?;
292                    write!(self.buf, ";")?;
293                    1
294                } else {
295                    0
296                };
297                for po in pos.iter().skip(should_skip) {
298                    self.new_line()?;
299                    self.check_comments(&po.1)?;
300                    self.write_po(&po)?;
301                    write!(self.buf, ";")?;
302                }
303                self.decr();
304                self.new_line()?;
305                write!(self.buf, "]")?;
306            }
307            BlankNode::Invalid => return Err(io::Error::new(io::ErrorKind::Other, "")),
308        }
309        Ok(())
310    }
311
312    fn write_collection(&mut self, coll: &Vec<Spanned<Term>>) -> io::Result<()> {
313        if coll.is_empty() {
314            return write!(self.buf, "( )");
315        }
316
317        let mut should_indent = false;
318        let start = self.buf.position();
319        let current_line = self.line_count;
320
321        write!(self.buf, "( ")?;
322
323        self.check_comments(&coll[0].1)?;
324        self.write_term(&coll[0])?;
325
326        for po in coll.iter().skip(1) {
327            self.check_comments(&po.1)?;
328            write!(self.buf, " ")?;
329            self.write_term(&po)?;
330            if self.current_line_length() > 80 {
331                should_indent = true;
332                break;
333            }
334        }
335        write!(self.buf, " )")?;
336
337        if should_indent {
338            self.buf.set_position(start);
339            self.line_count = current_line;
340            write!(self.buf, "(")?;
341            self.inc();
342            for po in coll.iter() {
343                self.new_line()?;
344                self.check_comments(&po.1)?;
345                self.write_term(&po)?;
346            }
347            self.decr();
348            self.new_line()?;
349            write!(self.buf, ")")?;
350        }
351
352        Ok(())
353    }
354
355    fn write_term(&mut self, term: &Term) -> io::Result<()> {
356        match term {
357            Term::Literal(s) => write!(self.buf, "{}", s)?,
358            Term::BlankNode(b) => self.write_bnode(b)?,
359            Term::NamedNode(n) => write!(self.buf, "{}", n)?,
360            Term::Collection(ts) => self.write_collection(ts)?,
361            Term::Invalid => {
362                return Err(io::Error::new(
363                    io::ErrorKind::Other,
364                    "cannot format turtle with invalid terms",
365                ))
366            }
367            Term::Variable(_) => {
368                return Err(io::Error::new(
369                    io::ErrorKind::Other,
370                    "cannot format turtle with variables",
371                ))
372            }
373        }
374        Ok(())
375    }
376
377    fn write_po(&mut self, po: &PO) -> io::Result<()> {
378        write!(self.buf, "{} ", po.predicate.0)?;
379        self.write_term(&po.object[0])?;
380        let mut should_indent = false;
381
382        let start = self.buf.position();
383        let current_line = self.line_count;
384        for i in 1..po.object.len() {
385            write!(self.buf, ", ")?;
386            self.write_term(&po.object[i])?;
387
388            if self.current_line_length() > 80 {
389                should_indent = true;
390                break;
391            }
392        }
393
394        if should_indent {
395            self.buf.set_position(start);
396            self.line_count = current_line;
397            self.inc();
398            for i in 1..po.object.len() {
399                write!(self.buf, ",")?;
400                self.new_line()?;
401                self.check_comments(&po.object[i].1)?;
402                self.write_term(&po.object[i])?;
403            }
404            self.decr();
405        }
406
407        Ok(())
408    }
409
410    fn write_triple(&mut self, triple: &Triple) -> io::Result<()> {
411        match &triple.subject.0 {
412            Term::BlankNode(bn) => self.write_bnode(bn)?,
413            Term::NamedNode(n) => write!(self.buf, "{}", n)?,
414            _ => write!(self.buf, "invalid")?,
415        }
416        write!(self.buf, " ")?;
417        self.write_po(&triple.po[0])?;
418        if triple.po.len() == 1 {
419            write!(self.buf, ".")?;
420            return Ok(());
421        }
422        write!(self.buf, ";")?;
423        self.inc();
424
425        self.new_line()?;
426        self.check_comments(&triple.po[1].1)?;
427        self.write_po(&triple.po[1])?;
428
429        if triple.po.len() == 2 {
430            self.decr();
431            write!(self.buf, ".")?;
432            return Ok(());
433        }
434
435        for i in 2..triple.po.len() {
436            write!(self.buf, ";")?;
437            self.new_line()?;
438            self.check_comments(&triple.po[i].1)?;
439            self.write_po(&triple.po[i])?;
440        }
441
442        write!(self.buf, ".")?;
443        self.decr();
444        Ok(())
445    }
446}
447
448pub fn format_turtle(
449    turtle: &Turtle,
450    config: FormattingOptions,
451    comments: &[Spanned<String>],
452    source: &Rope,
453) -> Option<String> {
454    let buf: Buf = Cursor::new(Vec::new());
455    let mut state = FormatState::new(config, buf, comments, source);
456    match state.write_turtle(turtle) {
457        Ok(_) => info!("Format succesful"),
458        Err(e) => {
459            info!("Format unsuccesful {:?}", e);
460            return None;
461        }
462    }
463    String::from_utf8(state.buf.into_inner()).ok()
464}
465
466#[cfg(test)]
467mod tests {
468
469    use std::str::FromStr;
470
471    use lsp_core::prelude::{Spanned, Token};
472    use ropey::Rope;
473
474    use crate::lang::{
475        context::Context, formatter::format_turtle, model::Turtle, parser as parser2,
476        tokenizer::parse_tokens_str_safe,
477    };
478
479    #[derive(Debug)]
480    pub enum Err {
481        Tokenizing,
482        Parsing,
483    }
484
485    fn parse_turtle(
486        inp: &str,
487        url: &lsp_core::lsp_types::Url,
488    ) -> Result<(Turtle, Vec<Spanned<String>>), Err> {
489        let context = Context::new();
490        let ctx = context.ctx();
491        let tokens = parse_tokens_str_safe(inp).map_err(|e| {
492            println!("Error {:?}", e);
493            Err::Tokenizing
494        })?;
495
496        let mut comments: Vec<_> = tokens
497            .iter()
498            .flat_map(|x| {
499                x.try_map_ref(|t| match t {
500                    Token::Comment(x) => Some(x.clone()),
501                    _ => None,
502                })
503            })
504            .collect();
505        comments.sort_by_key(|x| x.1.start);
506
507        let (turtle, errs) = parser2::parse_turtle(&url, tokens, inp.len(), ctx);
508        for e in errs {
509            println!("Error {:?}", e);
510        }
511
512        Ok((turtle.into_value(), comments))
513    }
514
515    #[test]
516    fn easy_format() {
517        let txt = r#"
518@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
519@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
520@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
521
522[] a foaf:Name;
523   foaf:knows <abc>;.
524"#;
525
526        let expected = r#"@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
527@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
528@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
529
530[ ] a foaf:Name;
531  foaf:knows <abc>.
532
533"#;
534
535        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
536        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
537        let formatted = format_turtle(
538            &output,
539            lsp_core::lsp_types::FormattingOptions {
540                tab_size: 2,
541                ..Default::default()
542            },
543            &comments,
544            &Rope::from_str(txt),
545        )
546        .expect("formatting");
547        assert_eq!(formatted, expected);
548    }
549
550    #[test]
551    fn harder_format_pos() {
552        let txt = r#"
553[] a foaf:Name;
554   foaf:knows <abc>; foaf:knows2 <abc>.
555
556"#;
557
558        let expected = r#"[ ] a foaf:Name;
559  foaf:knows <abc>;
560  foaf:knows2 <abc>.
561
562"#;
563
564        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
565        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
566        let formatted = format_turtle(
567            &output,
568            lsp_core::lsp_types::FormattingOptions {
569                tab_size: 2,
570                ..Default::default()
571            },
572            &comments,
573            &Rope::from_str(txt),
574        )
575        .expect("formatting");
576        assert_eq!(formatted, expected);
577    }
578
579    #[test]
580    fn format_blanknodes() {
581        let txt = r#"
582        [ <a> foaf:Person; foaf:knows <abc>; foaf:knows <def> ] foaf:knows [
583        a foaf:Person;
584        foaf:knows <abc>;
585        foaf:knows <def>;
586        ] .
587
588"#;
589
590        let expected = r#"[ <a> foaf:Person;
591  foaf:knows <abc>;
592  foaf:knows <def>;
593] foaf:knows [
594  a foaf:Person;
595  foaf:knows <abc>;
596  foaf:knows <def>;
597].
598
599"#;
600
601        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
602        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
603        let formatted = format_turtle(
604            &output,
605            lsp_core::lsp_types::FormattingOptions {
606                tab_size: 2,
607                ..Default::default()
608            },
609            &comments,
610            &Rope::from_str(txt),
611        )
612        .expect("formatting");
613        assert_eq!(formatted, expected);
614    }
615
616    #[test]
617    fn long_objectlist() {
618        let txt = r#"
619        <abc> a <something-long>, <something-longer-still>, <something-longer>, <something-tes>, <soemthing-eeeellssee>.
620"#;
621
622        let expected = r#"<abc> a <something-long>,
623  <something-longer-still>,
624  <something-longer>,
625  <something-tes>,
626  <soemthing-eeeellssee>.
627
628"#;
629
630        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
631        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
632        let formatted = format_turtle(
633            &output,
634            lsp_core::lsp_types::FormattingOptions {
635                tab_size: 2,
636                ..Default::default()
637            },
638            &comments,
639            &Rope::from_str(txt),
640        )
641        .expect("formatting");
642        assert_eq!(formatted, expected);
643    }
644
645    #[test]
646    fn short_collection() {
647        let txt = r#"
648        <abc> a (), (<abc> <def>).
649"#;
650
651        let expected = r#"<abc> a ( ), ( <abc> <def> ).
652
653"#;
654
655        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
656        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
657        let formatted = format_turtle(
658            &output,
659            lsp_core::lsp_types::FormattingOptions {
660                tab_size: 2,
661                ..Default::default()
662            },
663            &comments,
664            &Rope::from_str(txt),
665        )
666        .expect("formatting");
667        assert_eq!(formatted, expected);
668    }
669
670    #[test]
671    fn long_collection() {
672        let txt = r#"
673        <abc> a (), (<somevery-very-very-long-item> <and-othersss> <and-ottteeehs> <wheeeeeeeeeeeee>).
674"#;
675
676        let expected = r#"<abc> a ( ), (
677  <somevery-very-very-long-item>
678  <and-othersss>
679  <and-ottteeehs>
680  <wheeeeeeeeeeeee>
681).
682
683"#;
684
685        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
686        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
687        let formatted = format_turtle(
688            &output,
689            lsp_core::lsp_types::FormattingOptions {
690                tab_size: 2,
691                ..Default::default()
692            },
693            &comments,
694            &Rope::from_str(txt),
695        )
696        .expect("formatting");
697        assert_eq!(formatted, expected);
698    }
699
700    #[test]
701    fn easy_comments() {
702        let txt = r#"
703# Test this is a cool test or something!
704            # Another comment!
705
706[] a foaf:Name;
707   foaf:knows <abc>; foaf:knows2 <abc>.
708
709"#;
710
711        let expected = r#"# Test this is a cool test or something!
712# Another comment!
713[ ] a foaf:Name;
714  foaf:knows <abc>;
715  foaf:knows2 <abc>.
716
717"#;
718
719        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
720        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
721        println!("OUtput {:?}", output);
722        let formatted = format_turtle(
723            &output,
724            lsp_core::lsp_types::FormattingOptions {
725                tab_size: 2,
726                ..Default::default()
727            },
728            &comments,
729            &Rope::from_str(txt),
730        )
731        .expect("formatting");
732        assert_eq!(formatted, expected);
733    }
734
735    #[test]
736    fn hard_comments() {
737        let txt = r#"
738
739[] a foaf:Name; # Nested comment
740   foaf:knows <abc>;     # Another comment!
741   foaf:knows2 <abc>.
742
743   #trailing comments
744"#;
745
746        let expected = r#"[ ] a foaf:Name;
747  # Nested comment
748  foaf:knows <abc>;
749  # Another comment!
750  foaf:knows2 <abc>.
751
752#trailing comments
753"#;
754        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
755        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
756        let formatted = format_turtle(
757            &output,
758            lsp_core::lsp_types::FormattingOptions {
759                tab_size: 2,
760                ..Default::default()
761            },
762            &comments,
763            &Rope::from_str(txt),
764        )
765        .expect("formatting");
766        assert_eq!(formatted, expected);
767    }
768
769    #[test]
770    fn bug_1() {
771        let txt = r#"
772[] a sh:NodeShape;
773  sh:targetClass js:Echo;
774  sh:property [
775    sh:class :ReaderChannel;
776    sh:path js:input;
777    sh:name "Input Channel"
778  ], [
779    sh:class :WriterChannel;
780    sh:path js:output;
781    sh:name "Output Channel"
782  ].
783
784"#;
785
786        let expected = r#"[ ] a sh:NodeShape;
787  sh:targetClass js:Echo;
788  sh:property [
789    sh:class :ReaderChannel;
790    sh:path js:input;
791    sh:name "Input Channel";
792  ], [
793    sh:class :WriterChannel;
794    sh:path js:output;
795    sh:name "Output Channel";
796  ].
797
798"#;
799
800        let url = lsp_core::lsp_types::Url::from_str("http://example.com/ns#").unwrap();
801        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
802        let formatted = format_turtle(
803            &output,
804            lsp_core::lsp_types::FormattingOptions {
805                tab_size: 2,
806                ..Default::default()
807            },
808            &comments,
809            &Rope::from_str(txt),
810        )
811        .expect("formatting");
812        assert_eq!(formatted, expected);
813    }
814}