blob: 99133c7893d4abe5c64dba7a6267c018ef3aa2ec (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
|
WEBVTT
00:00:01.520 --> 00:00:04.400
hello everyone my name is toniang
00:00:04.400 --> 00:00:07.200
I've been using amax for about 10 years
00:00:07.200 --> 00:00:09.280
today I'm going to talk about 360
00:00:09.280 --> 00:00:11.519
a new imax package that allows ems to
00:00:11.519 --> 00:00:13.759
pass multiple programming languages
00:00:13.759 --> 00:00:17.840
in real time
00:00:17.840 --> 00:00:21.840
so what is the problem statement
00:00:21.840 --> 00:00:23.359
in order to support programming
00:00:23.359 --> 00:00:24.960
functionalities for a particular
00:00:24.960 --> 00:00:25.760
language
00:00:25.760 --> 00:00:27.680
a text editor needs to have some degree
00:00:27.680 --> 00:00:29.679
of language understanding
00:00:29.679 --> 00:00:31.840
traditionally text editors have relied
00:00:31.840 --> 00:00:33.840
very heavily on regular expressions for
00:00:33.840 --> 00:00:34.960
this
00:00:34.960 --> 00:00:38.320
e-max is no different most language
00:00:38.320 --> 00:00:39.280
major modes use
00:00:39.280 --> 00:00:40.879
regular expressions for syntax
00:00:40.879 --> 00:00:42.960
highlighting code navigation
00:00:42.960 --> 00:00:46.239
folding indexing and so on regular
00:00:46.239 --> 00:00:47.440
expressions are
00:00:47.440 --> 00:00:50.559
problematic for a couple of reasons
00:00:50.559 --> 00:00:53.600
they're slow and inaccurate they also
00:00:53.600 --> 00:00:54.000
make
00:00:54.000 --> 00:00:56.800
the code hard to read and write
00:00:56.800 --> 00:00:57.440
sometimes
00:00:57.440 --> 00:00:59.199
it's because the regular expressions
00:00:59.199 --> 00:01:01.199
themselves are very hairy
00:01:01.199 --> 00:01:04.000
and sometimes because they are just not
00:01:04.000 --> 00:01:05.199
powerful enough
00:01:05.199 --> 00:01:07.840
some helper code is usually needed to
00:01:07.840 --> 00:01:11.200
pass more intricate language features
00:01:11.200 --> 00:01:13.280
that also illustrates the core problem
00:01:13.280 --> 00:01:16.159
with regular expressions
00:01:16.159 --> 00:01:18.400
in that they are not powerful enough to
00:01:18.400 --> 00:01:21.119
pass programming languages
00:01:21.119 --> 00:01:22.640
an example feature that regular
00:01:22.640 --> 00:01:25.040
expressions cannot handle very well
00:01:25.040 --> 00:01:27.520
is string interpolation which is a very
00:01:27.520 --> 00:01:28.320
common feature
00:01:28.320 --> 00:01:31.680
in many modern programming languages
00:01:31.680 --> 00:01:34.079
it would be much nicer if image somehow
00:01:34.079 --> 00:01:35.840
had structural understanding of source
00:01:35.840 --> 00:01:36.479
code
00:01:36.479 --> 00:01:39.520
like ides do
00:01:39.520 --> 00:01:41.119
there have been multiple efforts to
00:01:41.119 --> 00:01:42.960
bring this kind of programming language
00:01:42.960 --> 00:01:45.280
understanding into Emacs
00:01:45.280 --> 00:01:47.119
there are language specific persons
00:01:47.119 --> 00:01:48.640
written in elise
00:01:48.640 --> 00:01:50.240
they can be thought of as the next
00:01:50.240 --> 00:01:52.320
logical step of the glue code on top
00:01:52.320 --> 00:01:54.960
of tribal expressions moving from
00:01:54.960 --> 00:01:56.000
partial local
00:01:56.000 --> 00:01:58.079
pattern recognition into a full-fledged
00:01:58.079 --> 00:01:59.840
parser
00:01:59.840 --> 00:02:01.439
the most prominent example of this
00:02:01.439 --> 00:02:03.040
approach is probably the famous
00:02:03.040 --> 00:02:06.479
js2 mode
00:02:06.479 --> 00:02:10.080
however this approach has several issues
00:02:10.080 --> 00:02:12.959
parsing is computationally expensive and
00:02:12.959 --> 00:02:13.680
imagine
00:02:13.680 --> 00:02:16.800
is not good at that kind of stuff
00:02:16.800 --> 00:02:18.400
furthermore maintenance is very
00:02:18.400 --> 00:02:20.840
troublesome in order to work on these
00:02:20.840 --> 00:02:22.160
process
00:02:22.160 --> 00:02:23.599
first you have to know at least well
00:02:23.599 --> 00:02:25.599
enough and then you have to be
00:02:25.599 --> 00:02:27.760
comfortable with writing a
00:02:27.760 --> 00:02:30.319
recursive ascendant parser while
00:02:30.319 --> 00:02:32.080
constantly keeping up with changes to
00:02:32.080 --> 00:02:34.000
the language itself
00:02:34.000 --> 00:02:36.879
which can be evolving very quickly like
00:02:36.879 --> 00:02:39.360
javascript for example
00:02:39.360 --> 00:02:41.599
together these constraints significantly
00:02:41.599 --> 00:02:45.680
reduce the pull of potential maintenance
00:02:45.680 --> 00:02:47.760
the biggest issue though in my opinion
00:02:47.760 --> 00:02:49.680
is lack of the set of generic
00:02:49.680 --> 00:02:52.879
and reusable apis this makes them very
00:02:52.879 --> 00:02:54.319
hard to use
00:02:54.319 --> 00:02:55.920
for minor modes that want to deal with
00:02:55.920 --> 00:02:57.920
cross-cutting concerns across multiple
00:02:57.920 --> 00:02:59.920
languages
00:02:59.920 --> 00:03:01.760
the other approach which has been
00:03:01.760 --> 00:03:03.599
gaining a lot of momentum in recent
00:03:03.599 --> 00:03:04.319
years
00:03:04.319 --> 00:03:06.560
is externalizing language understanding
00:03:06.560 --> 00:03:08.159
to another process
00:03:08.159 --> 00:03:12.239
also known as language server protocol
00:03:12.239 --> 00:03:14.480
this second approach is actually a very
00:03:14.480 --> 00:03:16.560
interesting one
00:03:16.560 --> 00:03:18.400
my decoupling language understanding
00:03:18.400 --> 00:03:21.280
from the editing facility itself
00:03:21.280 --> 00:03:23.760
the usb servers can attract a lot more
00:03:23.760 --> 00:03:25.120
contributors
00:03:25.120 --> 00:03:28.959
which makes maintenance easier however
00:03:28.959 --> 00:03:32.400
they also have several issues available
00:03:32.400 --> 00:03:34.720
being a separate process they are
00:03:34.720 --> 00:03:36.000
usually more resource
00:03:36.000 --> 00:03:39.920
intensive and depending on the language
00:03:39.920 --> 00:03:42.159
the usb server itself can bring with it
00:03:42.159 --> 00:03:44.640
a host of additional dependencies
00:03:44.640 --> 00:03:47.680
external to Emacs which may message to
00:03:47.680 --> 00:03:50.640
install and manage
00:03:50.640 --> 00:03:53.760
furthermore json over rpc has pretty
00:03:53.760 --> 00:03:55.120
high latency
00:03:55.120 --> 00:03:57.840
for one-off tasks like jumping to source
00:03:57.840 --> 00:04:00.879
or on-demand completion is great
00:04:00.879 --> 00:04:03.040
but for things like code highlighting
00:04:03.040 --> 00:04:06.000
the latency is just too much
00:04:06.000 --> 00:04:08.319
I was using rust and I was following the
00:04:08.319 --> 00:04:10.480
community effort to improve its id
00:04:10.480 --> 00:04:11.760
support
00:04:11.760 --> 00:04:13.680
hoping to integrate some of that into
00:04:13.680 --> 00:04:15.760
Emacs itself
00:04:15.760 --> 00:04:17.600
then I heard someone from community
00:04:17.600 --> 00:04:19.759
mention tree sitter
00:04:19.759 --> 00:04:23.360
and I decided to check it out
00:04:23.360 --> 00:04:25.520
basically trisita is an incremental
00:04:25.520 --> 00:04:28.720
parsing library and a parser generator
00:04:28.720 --> 00:04:31.000
it was introduced by the item editor in
00:04:31.000 --> 00:04:33.040
2018
00:04:33.040 --> 00:04:35.680
besides item is also being integrated
00:04:35.680 --> 00:04:36.960
into the neo-vim
00:04:36.960 --> 00:04:41.040
editor and github is using it to power
00:04:41.040 --> 00:04:42.479
their source code analysis and
00:04:42.479 --> 00:04:45.840
navigation features
00:04:45.840 --> 00:04:48.639
it is written in c and can be compiled
00:04:48.639 --> 00:04:49.199
for all
00:04:49.199 --> 00:04:53.120
major platforms it can even be compiled
00:04:53.120 --> 00:04:56.080
to web assembly to run on the web that's
00:04:56.080 --> 00:04:57.600
how github is using it
00:04:57.600 --> 00:05:00.800
on their website
00:05:00.800 --> 00:05:02.960
so why is trisita an interesting
00:05:02.960 --> 00:05:05.840
solution to this problem
00:05:05.840 --> 00:05:07.360
there are multiple features that make it
00:05:07.360 --> 00:05:10.000
an attractive option
00:05:10.000 --> 00:05:12.400
it is designed to be fast by being
00:05:12.400 --> 00:05:13.680
incremental
00:05:13.680 --> 00:05:15.680
the initial parts of a typical big fight
00:05:15.680 --> 00:05:18.160
can take tens of milliseconds
00:05:18.160 --> 00:05:20.240
while subsequent incremental processes
00:05:20.240 --> 00:05:22.560
are sub milliseconds
00:05:22.560 --> 00:05:24.720
it achieves this by using structural
00:05:24.720 --> 00:05:26.240
sharing
00:05:26.240 --> 00:05:29.360
meaning replacing only affected nodes
00:05:29.360 --> 00:05:32.960
in the old tree when it needs to
00:05:32.960 --> 00:05:36.000
also unlike lsp being in the same
00:05:36.000 --> 00:05:37.120
process
00:05:37.120 --> 00:05:40.639
it has much lower latency
00:05:40.639 --> 00:05:42.880
secondly it provides a uniform
00:05:42.880 --> 00:05:44.960
programming interface
00:05:44.960 --> 00:05:47.039
the same data structures and functions
00:05:47.039 --> 00:05:48.720
work on parse trees of different
00:05:48.720 --> 00:05:50.400
languages
00:05:50.400 --> 00:05:52.160
syntax knows of different languages
00:05:52.160 --> 00:05:54.160
differ only by their types
00:05:54.160 --> 00:05:57.360
and their possible child nodes this
00:05:57.360 --> 00:05:58.960
is a big advantage over language
00:05:58.960 --> 00:06:02.240
specific parcels
00:06:02.240 --> 00:06:04.880
thirdly it's written in self-contained
00:06:04.880 --> 00:06:06.880
embeddable c
00:06:06.880 --> 00:06:09.680
as I mentioned previously it can even be
00:06:09.680 --> 00:06:10.400
compiled
00:06:10.400 --> 00:06:13.759
to webassembly this makes integrating it
00:06:13.759 --> 00:06:15.199
into various editors
00:06:15.199 --> 00:06:18.240
quite easy without having to install
00:06:18.240 --> 00:06:22.880
any external dependencies
00:06:22.880 --> 00:06:24.639
one thing that is not mentioned here is
00:06:24.639 --> 00:06:28.000
that being a parcel generator
00:06:28.000 --> 00:06:31.039
scrummers are declarative
00:06:31.039 --> 00:06:34.880
together with being editor independent
00:06:34.880 --> 00:06:36.720
this makes the pool of potential
00:06:36.720 --> 00:06:38.160
contributors
00:06:38.160 --> 00:06:42.400
much larger so I was convinced
00:06:42.400 --> 00:06:45.520
that trisito is a good fit for Emacs
00:06:45.520 --> 00:06:48.000
last year I started writing the bindings
00:06:48.000 --> 00:06:48.720
using
00:06:48.720 --> 00:06:50.960
dynamic model support introduced in imax
00:06:50.960 --> 00:06:53.280
25.
00:06:53.280 --> 00:06:55.360
dynamic module means there is platform
00:06:55.360 --> 00:06:58.479
specific native code involved
00:06:58.479 --> 00:07:00.560
but since they are pre-compiled binaries
00:07:00.560 --> 00:07:02.880
for the three major platforms
00:07:02.880 --> 00:07:06.319
it should work in most places currently
00:07:06.319 --> 00:07:08.319
the core functionalities are in a pretty
00:07:08.319 --> 00:07:09.440
good shape
00:07:09.440 --> 00:07:12.560
syntax highlighting is working nicely
00:07:12.560 --> 00:07:14.840
the whole thing is split into three
00:07:14.840 --> 00:07:16.080
packages
00:07:16.080 --> 00:07:17.759
tree sitter is the main package that
00:07:17.759 --> 00:07:20.319
other packages should depend on
00:07:20.319 --> 00:07:22.800
tree system lens is the language bundle
00:07:22.800 --> 00:07:24.000
that includes support
00:07:24.000 --> 00:07:27.199
for most common languages
00:07:27.199 --> 00:07:30.080
and finally the core apis are in the
00:07:30.080 --> 00:07:32.160
package tsc
00:07:32.160 --> 00:07:36.160
which stands for trees the core
00:07:36.160 --> 00:07:38.800
it is the implicit dependency of the
00:07:38.800 --> 00:07:43.520
three-seater package
00:07:43.520 --> 00:07:46.000
the main package includes the miner mode
00:07:46.000 --> 00:07:47.520
3-seater mode
00:07:47.520 --> 00:07:49.840
this provides the base for other major
00:07:49.840 --> 00:07:52.560
or minor modes to build on
00:07:52.560 --> 00:07:55.280
using image change tracking hooks it
00:07:55.280 --> 00:07:55.840
enables
00:07:55.840 --> 00:07:58.080
incremental parsing and provides a
00:07:58.080 --> 00:08:00.800
syntax tree that is always up to date
00:08:00.800 --> 00:08:04.080
after any edits in a buffer
00:08:04.080 --> 00:08:06.560
there is also a basic debug mode that
00:08:06.560 --> 00:08:10.080
shows the parse tree in another buffer
00:08:10.080 --> 00:08:13.360
here is a quick demo
00:08:13.360 --> 00:08:15.759
here I mean an empty python buffer with
00:08:15.759 --> 00:08:17.520
three seater enabled
00:08:17.520 --> 00:08:19.440
I'm going to turn on the debug mode to
00:08:19.440 --> 00:08:26.560
see the parse tree
00:08:26.560 --> 00:08:28.720
since the buffer is empty there is only
00:08:28.720 --> 00:08:30.639
one node in the syntax tree the top
00:08:30.639 --> 00:08:33.279
level module node
00:08:33.279 --> 00:09:11.040
let's try typing some code
00:09:11.040 --> 00:09:13.600
as you can see as I type into the python
00:09:13.600 --> 00:09:14.640
buffer
00:09:14.640 --> 00:09:19.120
the syntax tree updates in real time
00:09:19.120 --> 00:09:21.120
the other minor mode included in the
00:09:21.120 --> 00:09:23.279
main package is 3-seater
00:09:23.279 --> 00:09:26.640
hl mode it overrides font-lock mode and
00:09:26.640 --> 00:09:28.480
provides its own set of phases
00:09:28.480 --> 00:09:31.839
and customization options it is query
00:09:31.839 --> 00:09:32.800
driven
00:09:32.800 --> 00:09:35.200
that means instead of regular
00:09:35.200 --> 00:09:36.240
expressions
00:09:36.240 --> 00:09:38.720
it uses a list like query language to
00:09:38.720 --> 00:09:40.320
map syntax notes
00:09:40.320 --> 00:09:43.760
to highlighting phrases I'm going to
00:09:43.760 --> 00:09:45.760
open a python file with small snippets
00:09:45.760 --> 00:09:54.320
that showcase syntax highlighting
00:09:54.320 --> 00:09:55.920
so this is the default highlighting
00:09:55.920 --> 00:10:00.880
provided by python mode
00:10:00.880 --> 00:10:02.839
this is the highlighting enabled by tree
00:10:02.839 --> 00:10:04.640
sitter
00:10:04.640 --> 00:10:07.680
as you can see string interpolation
00:10:07.680 --> 00:10:11.680
and decorators are highlighted correctly
00:10:11.680 --> 00:10:17.440
function calls are also highlighted
00:10:17.440 --> 00:10:20.240
you can also note that property
00:10:20.240 --> 00:10:21.839
assessors
00:10:21.839 --> 00:10:24.640
and property assignments are highlighted
00:10:24.640 --> 00:10:27.440
differently
00:10:27.440 --> 00:10:29.360
what I like the most about this is that
00:10:29.360 --> 00:10:30.880
new bindings are consistently
00:10:30.880 --> 00:10:32.640
highlighted
00:10:32.640 --> 00:10:36.320
this included local variable
00:10:36.320 --> 00:10:39.760
function parameters and property
00:10:39.760 --> 00:10:45.760
mutations
00:10:45.760 --> 00:10:48.000
before going through the three queries
00:10:48.000 --> 00:10:49.279
and the syntax highlighting
00:10:49.279 --> 00:10:51.680
customization options
00:10:51.680 --> 00:10:53.760
let's take a brief look at the core data
00:10:53.760 --> 00:10:55.040
structures and functions
00:10:55.040 --> 00:10:58.079
that tree sitter provides
00:10:58.079 --> 00:10:59.839
so parsing is done with the help of a
00:10:59.839 --> 00:11:02.240
generic parser object
00:11:02.240 --> 00:11:04.160
a single parser object can be used to
00:11:04.160 --> 00:11:06.000
pass different languages
00:11:06.000 --> 00:11:08.320
by sending different language objects to
00:11:08.320 --> 00:11:09.279
it
00:11:09.279 --> 00:11:10.880
the language objects themselves are
00:11:10.880 --> 00:11:14.079
loaded from shared libraries
00:11:14.079 --> 00:11:16.079
since three seater mode already handles
00:11:16.079 --> 00:11:17.360
the parsing part
00:11:17.360 --> 00:11:19.440
we will instead focus on the functions
00:11:19.440 --> 00:11:20.800
that inspect nodes
00:11:20.800 --> 00:11:25.279
and in the resulting path tree
00:11:25.279 --> 00:11:27.200
we can ask tree sitter what is the
00:11:27.200 --> 00:11:44.240
syntax node at point
00:11:44.240 --> 00:11:47.200
uh is it an opaque object so this is not
00:11:47.200 --> 00:11:48.480
very useful
00:11:48.480 --> 00:12:03.760
we can instead ask what is its type
00:12:03.760 --> 00:12:06.560
so his type is the symbol comparison
00:12:06.560 --> 00:12:08.959
operator
00:12:08.959 --> 00:12:11.600
trees there are two kinds of nodes
00:12:11.600 --> 00:12:13.680
anonymous nodes and named nodes
00:12:13.680 --> 00:12:15.519
anonymous nodes correspond to simple
00:12:15.519 --> 00:12:17.040
grammar elements
00:12:17.040 --> 00:12:19.839
like keywords operators punctuations and
00:12:19.839 --> 00:12:21.279
so on
00:12:21.279 --> 00:12:24.160
name nodes on the other hand grammar
00:12:24.160 --> 00:12:25.920
elements that are interesting enough for
00:12:25.920 --> 00:12:26.639
their own
00:12:26.639 --> 00:12:30.320
to have a name like an identifier an
00:12:30.320 --> 00:12:31.839
expression
00:12:31.839 --> 00:12:35.440
or a function definition
00:12:35.440 --> 00:12:37.760
name node types are symbols while
00:12:37.760 --> 00:12:42.639
anonymous node types are strings
00:12:42.639 --> 00:12:46.320
for example if we are on this
00:12:46.320 --> 00:12:49.760
comparison operator
00:12:49.760 --> 00:12:55.920
the node type should be a string
00:12:55.920 --> 00:12:57.920
we can also get other information about
00:12:57.920 --> 00:12:58.959
the node
00:12:58.959 --> 00:13:09.680
for example what is this text
00:13:09.680 --> 00:13:20.800
or where it is in the buffer
00:13:20.800 --> 00:13:43.199
or what is its parent
00:13:43.199 --> 00:13:46.160
there are many other apis to query or
00:13:46.160 --> 00:13:46.839
not
00:13:46.839 --> 00:13:52.639
properties
00:13:52.639 --> 00:13:54.399
tree sitter allows searching for
00:13:54.399 --> 00:13:58.240
structural patterns within a parse tree
00:13:58.240 --> 00:14:01.440
it does so through a list like language
00:14:01.440 --> 00:14:03.519
this language supports by the matching
00:14:03.519 --> 00:14:04.639
by node types
00:14:04.639 --> 00:14:07.760
field names and predicates
00:14:07.760 --> 00:14:10.079
it also allows capturing nodes for
00:14:10.079 --> 00:14:12.639
further processing
00:14:12.639 --> 00:14:37.680
let's try to see some examples
00:14:37.680 --> 00:14:41.040
so in this very simple query we just
00:14:41.040 --> 00:14:43.839
try to highlight all the identifiers in
00:14:43.839 --> 00:14:49.040
the buffer
00:14:49.040 --> 00:14:51.920
this s side tells trisito to capture a
00:14:51.920 --> 00:14:53.120
node
00:14:53.120 --> 00:14:55.839
in the context of the query builder it's
00:14:55.839 --> 00:14:57.360
not very important
00:14:57.360 --> 00:15:00.320
but in normal highlighting query this
00:15:00.320 --> 00:15:01.760
will determine
00:15:01.760 --> 00:15:06.639
the face used to highlight the note
00:15:06.639 --> 00:15:08.800
suppose we want to capture all the
00:15:08.800 --> 00:15:10.320
function names
00:15:10.320 --> 00:15:13.519
instead of just any identifier
00:15:13.519 --> 00:15:29.440
you can improve the query like this
00:15:29.440 --> 00:15:31.600
uh this will highlight the whole
00:15:31.600 --> 00:15:32.639
definition
00:15:32.639 --> 00:15:35.519
but we only want to capture the function
00:15:35.519 --> 00:15:36.399
name
00:15:36.399 --> 00:15:39.600
which means the identifier
00:15:39.600 --> 00:15:42.800
here so we
00:15:42.800 --> 00:15:46.320
move the capture to after the identifier
00:15:46.320 --> 00:15:49.600
node
00:15:49.600 --> 00:15:51.759
if we want to capture the class names as
00:15:51.759 --> 00:15:52.959
well
00:15:52.959 --> 00:16:10.079
we just add another pattern
00:16:10.079 --> 00:16:20.320
let's look at a more practical example
00:16:20.320 --> 00:16:22.959
here we can see that single quotes
00:16:22.959 --> 00:16:23.759
strings and
00:16:23.759 --> 00:16:25.600
double quotes screens are highlighted
00:16:25.600 --> 00:16:27.279
the same
00:16:27.279 --> 00:16:30.399
but in some places
00:16:30.399 --> 00:16:33.440
because of some coding conventions
00:16:33.440 --> 00:16:35.440
it may be desirable to highlight them
00:16:35.440 --> 00:16:37.279
differently for example if
00:16:37.279 --> 00:16:39.680
the string is single quoted we may want
00:16:39.680 --> 00:16:40.880
to highlight it
00:16:40.880 --> 00:16:44.399
as a constant
00:16:44.399 --> 00:16:46.160
let's try to see whether we can
00:16:46.160 --> 00:16:47.600
distinguish these
00:16:47.600 --> 00:16:56.240
two cases
00:16:56.240 --> 00:17:00.639
so here we get all the strings
00:17:00.639 --> 00:17:04.079
if we want to see if it's single quotes
00:17:04.079 --> 00:17:04.559
or
00:17:04.559 --> 00:17:08.799
double quote strings
00:17:08.799 --> 00:17:11.039
we can try looking at the first
00:17:11.039 --> 00:17:12.480
character
00:17:12.480 --> 00:17:15.280
of the string I mean the first character
00:17:15.280 --> 00:17:16.720
of the note
00:17:16.720 --> 00:17:19.360
to check whether it's a single quote or
00:17:19.360 --> 00:17:33.600
a double quote
00:17:33.600 --> 00:17:36.080
yeah so for that we use the three
00:17:36.080 --> 00:17:36.799
setters
00:17:36.799 --> 00:17:40.160
support for predicate in this case
00:17:40.160 --> 00:17:43.360
we use a match predicate
00:17:43.360 --> 00:17:46.080
to check whether the string where the
00:17:46.080 --> 00:17:46.799
note
00:17:46.799 --> 00:17:50.320
starts with a single quote and with this
00:17:50.320 --> 00:17:51.280
pattern
00:17:51.280 --> 00:17:58.840
we only capture the single quotes
00:17:58.840 --> 00:18:00.400
strings
00:18:00.400 --> 00:18:03.760
let's try to give it a different face
00:18:03.760 --> 00:18:13.039
so we copy the pattern
00:18:13.039 --> 00:18:18.640
and we add this pattern
00:18:18.640 --> 00:18:25.120
pop item only
00:18:25.120 --> 00:18:28.400
but we also want to give the
00:18:28.400 --> 00:18:31.440
capture a different name
00:18:31.440 --> 00:18:40.840
let's say we want to highlight it as a
00:18:40.840 --> 00:18:46.559
keyword
00:18:46.559 --> 00:19:06.320
and now if we refresh the buffer
00:19:06.320 --> 00:19:08.799
we see that single quote strings are
00:19:08.799 --> 00:19:10.320
highlighted as
00:19:10.320 --> 00:19:14.400
keywords
00:19:14.400 --> 00:19:16.400
the highlighting patterns can also be
00:19:16.400 --> 00:19:19.200
set for a single project
00:19:19.200 --> 00:19:23.440
using directory local variable
00:19:23.440 --> 00:19:26.880
for example let's take a look at
00:19:26.880 --> 00:19:35.760
ems source code
00:19:35.760 --> 00:19:40.400
so in image c source there are a lot of
00:19:40.400 --> 00:19:43.760
uses of these different macros
00:19:43.760 --> 00:19:47.679
to define functions
00:19:47.679 --> 00:19:51.200
and you can see
00:19:51.200 --> 00:19:53.520
this is actually the function name but
00:19:53.520 --> 00:19:55.760
it's highlighted as the
00:19:55.760 --> 00:19:59.120
string so what we want
00:19:59.120 --> 00:20:03.679
is to somehow recognize this pattern
00:20:03.679 --> 00:20:07.600
and highlight it
00:20:07.600 --> 00:20:11.280
as highlight this part
00:20:11.280 --> 00:20:14.559
with the function phase instead
00:20:14.559 --> 00:20:17.679
in order to do that
00:20:17.679 --> 00:20:20.240
we put a pattern in this project
00:20:20.240 --> 00:20:21.760
directory local
00:20:21.760 --> 00:20:31.760
settings file
00:20:31.760 --> 00:20:34.799
so we can put this button in the c
00:20:34.799 --> 00:20:40.159
mode section
00:20:40.159 --> 00:20:48.000
and now if we enable tree sitter
00:20:48.000 --> 00:20:50.480
you can see that this is the highlighted
00:20:50.480 --> 00:20:53.200
uh
00:20:53.200 --> 00:20:55.520
as a normal function definition so this
00:20:55.520 --> 00:20:56.559
is the function
00:20:56.559 --> 00:21:01.200
face like we wanted
00:21:01.200 --> 00:21:03.760
the pattern for this is actually pretty
00:21:03.760 --> 00:21:07.200
simple
00:21:07.200 --> 00:21:10.720
it's only
00:21:10.720 --> 00:21:14.720
only this part so
00:21:14.720 --> 00:21:17.440
if it's a function call where the name
00:21:17.440 --> 00:21:19.679
of the function is different
00:21:19.679 --> 00:21:21.600
then we highlight the different as a
00:21:21.600 --> 00:21:24.240
keyword
00:21:24.240 --> 00:21:27.360
and then the first string element we
00:21:27.360 --> 00:21:28.159
highlighted
00:21:28.159 --> 00:21:35.360
as a function name
00:21:35.360 --> 00:21:37.679
since the language objects are actually
00:21:37.679 --> 00:21:39.280
native code
00:21:39.280 --> 00:21:40.799
they have to be compiled for each
00:21:40.799 --> 00:21:43.440
platform that we want to support
00:21:43.440 --> 00:21:45.600
this will become a big obstacle for
00:21:45.600 --> 00:21:48.159
3-seater adoption
00:21:48.159 --> 00:21:50.240
therefore I've created a language window
00:21:50.240 --> 00:21:52.960
package 3-seater length
00:21:52.960 --> 00:21:54.960
that takes care of pre-compiling the
00:21:54.960 --> 00:21:56.320
grammars the
00:21:56.320 --> 00:21:59.679
most common grammars for all three major
00:21:59.679 --> 00:22:01.600
platforms
00:22:01.600 --> 00:22:04.080
it also takes care of distributing these
00:22:04.080 --> 00:22:05.360
binaries
00:22:05.360 --> 00:22:08.080
and provides some highlighting queries
00:22:08.080 --> 00:22:11.440
for some of the languages
00:22:11.440 --> 00:22:13.760
it should be noted that this package
00:22:13.760 --> 00:22:15.919
should be treated as a temporary
00:22:15.919 --> 00:22:19.919
distribution mechanism only
00:22:19.919 --> 00:22:22.240
to help with bootstrapping three-seaters
00:22:22.240 --> 00:22:24.720
adoption
00:22:24.720 --> 00:22:27.760
the plan is that eventually these files
00:22:27.760 --> 00:22:29.760
should be provided by the language major
00:22:29.760 --> 00:22:32.480
modes themselves
00:22:32.480 --> 00:22:35.120
but in order to do that we need better
00:22:35.120 --> 00:22:36.320
tooling
00:22:36.320 --> 00:22:40.240
so we're not there yet
00:22:40.240 --> 00:22:42.559
since the call already works reasonably
00:22:42.559 --> 00:22:43.280
well
00:22:43.280 --> 00:22:44.640
there are several areas that would
00:22:44.640 --> 00:22:46.320
benefit from the community's
00:22:46.320 --> 00:22:49.120
contribution
00:22:49.120 --> 00:22:51.520
so three seaters upstream language
00:22:51.520 --> 00:22:52.640
prepositories
00:22:52.640 --> 00:22:54.400
already contain highlighting queries on
00:22:54.400 --> 00:22:55.679
their own
00:22:55.679 --> 00:22:58.480
however they are pretty basic and they
00:22:58.480 --> 00:23:00.480
may not fit well with existing emax
00:23:00.480 --> 00:23:02.559
conventions
00:23:02.559 --> 00:23:04.320
therefore the language bundle has its
00:23:04.320 --> 00:23:07.120
own set of highlighting queries
00:23:07.120 --> 00:23:10.559
this requires maintenance until language
00:23:10.559 --> 00:23:11.600
measurements adopt
00:23:11.600 --> 00:23:13.760
three sitter and maintain the queries on
00:23:13.760 --> 00:23:16.640
their own
00:23:16.640 --> 00:23:18.480
the queries are actually quite easy to
00:23:18.480 --> 00:23:22.000
write as you've already seen
00:23:22.000 --> 00:23:24.240
you just need to be familiar with the
00:23:24.240 --> 00:23:25.360
language
00:23:25.360 --> 00:23:30.000
familiar enough to come up with sensible
00:23:30.000 --> 00:23:35.200
highlighting patterns
00:23:35.200 --> 00:23:37.600
and if you are a maintainer of a
00:23:37.600 --> 00:23:39.679
language major mode
00:23:39.679 --> 00:23:42.320
you may want to consider integrating
00:23:42.320 --> 00:23:43.360
tree sitter into
00:23:43.360 --> 00:23:46.960
your mode initially maybe as an
00:23:46.960 --> 00:23:50.080
optional feature the integration is
00:23:50.080 --> 00:23:53.279
actually pretty straightforward
00:23:53.279 --> 00:23:56.640
especially for syntax highlighting
00:23:56.640 --> 00:24:01.520
or alternatively
00:24:01.520 --> 00:24:03.760
you can also try writing a new major
00:24:03.760 --> 00:24:04.640
mode
00:24:04.640 --> 00:24:08.000
from scratch that relies on tree sitter
00:24:08.000 --> 00:24:12.559
from the very beginning
00:24:12.559 --> 00:24:16.320
the code for such a major mode is
00:24:16.320 --> 00:24:19.679
quite simple for example
00:24:19.679 --> 00:24:23.200
this is the proposed
00:24:23.200 --> 00:24:26.240
what mode for web assembly
00:24:26.240 --> 00:24:31.039
the code is just
00:24:31.039 --> 00:24:34.559
like one page of code not
00:24:34.559 --> 00:24:39.520
not a lot
00:24:39.520 --> 00:24:42.720
you can also try writing new minor modes
00:24:42.720 --> 00:24:46.559
or writing integration packages
00:24:46.559 --> 00:24:50.080
for example a lot of package a lot of
00:24:50.080 --> 00:24:50.880
packages
00:24:50.880 --> 00:24:54.559
may benefit from tree sitter integration
00:24:54.559 --> 00:24:58.840
but no one has written the integration
00:24:58.840 --> 00:25:02.960
yet
00:25:02.960 --> 00:25:05.039
if you are interested in 3-seater you
00:25:05.039 --> 00:25:06.720
can use these links to
00:25:06.720 --> 00:25:10.320
learn more about it I think that's it
00:25:10.320 --> 00:25:11.440
for me today
00:25:11.440 --> 00:25:18.159
I'm happy to answer any questions
|