mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-26 18:03:21 +00:00
Compare commits
1746 Commits
coreml-wit
...
ci/env
Author | SHA1 | Date | |
---|---|---|---|
05ce7476ae | |||
f11de0e73c | |||
d5cc27ee4d | |||
5bb1d58c6a | |||
7d14005717 | |||
4ffb8e3e4d | |||
1d8d8ae55e | |||
eebf6bc0bd | |||
dc8f423b40 | |||
548e7052f1 | |||
a34cb73dc2 | |||
82f9496657 | |||
e3c85e75bd | |||
b9eab73fa2 | |||
76385c8311 | |||
442cd1d2e7 | |||
bc8cb97e02 | |||
8dcadf736b | |||
93986b61e0 | |||
bd1a9e34c9 | |||
cc03608e78 | |||
54a54faee4 | |||
96a92ecc4c | |||
edd1d8686a | |||
dc6f4e7c05 | |||
74c85d154e | |||
eb2d8b6ffd | |||
b442dcd598 | |||
c98681e6d5 | |||
3bab804981 | |||
c927830a70 | |||
992b51b3d5 | |||
2c882cbe4c | |||
1fbb119b1e | |||
40dea850fd | |||
8255a830a8 | |||
a0f76b2da7 | |||
394768c48b | |||
846e01b2c0 | |||
6ac8e6b2ce | |||
60d2ddebdf | |||
2e180184a8 | |||
ef40950c4a | |||
c774eec709 | |||
5b481a27a6 | |||
fc7b1ee521 | |||
c42f67e2d2 | |||
339a1cba5d | |||
c64f3e8ada | |||
9f83f67221 | |||
7d3da68f79 | |||
b5d21359c1 | |||
17addf7104 | |||
cdaee8b4bd | |||
4b60ff4f92 | |||
b43b9d928c | |||
e3cb412a59 | |||
ac301a7d9b | |||
82e04e7670 | |||
38ac47cd4d | |||
2d70cd36d7 | |||
98dab49b9a | |||
b1385e9aa9 | |||
48f5e893f5 | |||
dc21871fcb | |||
64a430bc81 | |||
51a3580c79 | |||
37a21dd43d | |||
8a22a8b17f | |||
fcbcad0c90 | |||
4444db7360 | |||
a7fc1038ca | |||
1689aaf854 | |||
4b48fe449a | |||
47cc043e69 | |||
e3d9ffb98b | |||
e22d69839d | |||
defe731263 | |||
4e07957bf9 | |||
d2c5154bb5 | |||
4fac43fe00 | |||
3be9670f17 | |||
86729fcd6d | |||
7fbca6304e | |||
d597f83e1a | |||
e5edcc6259 | |||
556f773d53 | |||
91d02de332 | |||
1b67d72f87 | |||
14d7c0368d | |||
db6e19188a | |||
b4b063a5c9 | |||
930b739e7a | |||
5981352bb5 | |||
7561da244e | |||
be83f342fb | |||
fd369871f7 | |||
bbd8364f5e | |||
e4102440ef | |||
f8242ec483 | |||
ef51b4cba4 | |||
6f08b24146 | |||
7c165d7fa8 | |||
2f0cf44915 | |||
b9c972fd0d | |||
01c9aafbfd | |||
bae6bbf487 | |||
c310272fa0 | |||
bd0b55dbe0 | |||
ba4645db2c | |||
dfc6ca62f3 | |||
47e14c0529 | |||
d682e15090 | |||
46d07b9c85 | |||
33ea03f131 | |||
dbcc669e1a | |||
16245b35e4 | |||
898c0cb9d1 | |||
eb9e5032c4 | |||
cadfc50eab | |||
3f91832352 | |||
cff8868b5f | |||
90e3c5fc40 | |||
e0f4cef867 | |||
234460987e | |||
b8ab126343 | |||
edc5d9267c | |||
344b98a44f | |||
dbeb7916b8 | |||
fad2806352 | |||
9906792ec3 | |||
c49ee07ff4 | |||
f8a831779e | |||
85451e3612 | |||
43c744ce8b | |||
fc2e44490d | |||
f41fdad200 | |||
80fa576254 | |||
75e7d0585e | |||
682a6f5f87 | |||
115716d109 | |||
b2cfef655b | |||
22e3df0afa | |||
028511d349 | |||
70c4038842 | |||
8639c003a9 | |||
d5d831da65 | |||
7230a6e1c8 | |||
a160fa0f3a | |||
0282ad8fd1 | |||
9e467815d4 | |||
727891d9bf | |||
c262dc80e2 | |||
30767b4c4e | |||
16eeb31933 | |||
ba523d5e22 | |||
3736706139 | |||
58640aa456 | |||
5183a05e56 | |||
0dcada42d4 | |||
d507b4cebe | |||
90171055f3 | |||
668306ff2b | |||
fdc21fc87b | |||
7183a1eb72 | |||
09f3c66648 | |||
62e2414620 | |||
de49024e49 | |||
db6383094c | |||
164f13c6a9 | |||
02aa86230a | |||
54a2ee648f | |||
9700cfb0a3 | |||
8e0143e205 | |||
f12559d590 | |||
589b40810a | |||
7ffcd05267 | |||
7a423f1c00 | |||
99b011a9f5 | |||
19d95f9f9a | |||
d5ef1737d8 | |||
1deb41f0e7 | |||
2425caf4fd | |||
a4b00bcaaf | |||
cdb8aa2f2e | |||
06209f6683 | |||
c3235bd81e | |||
262d0abc87 | |||
124eec1664 | |||
b08c3a88c8 | |||
0afce25a69 | |||
acdbe58631 | |||
09fabffdf5 | |||
3988d6396b | |||
c8c63eeec0 | |||
abf7f24410 | |||
341f5c28e6 | |||
5377099524 | |||
dcbb375779 | |||
4334c71aed | |||
e875a82473 | |||
507e230f1e | |||
eb68324c86 | |||
e940fbf283 | |||
35d0e02c72 | |||
45d3faf961 | |||
2ab2eb5110 | |||
b82d305282 | |||
885e31368d | |||
8a9ad7844d | |||
eb874b3a3c | |||
eb78e3a3f1 | |||
ece3ff88f6 | |||
9366544991 | |||
95583942ed | |||
2e93cb6a2f | |||
de5cd60d1c | |||
3fcba3e58b | |||
cea5f1c52f | |||
2112462db4 | |||
fc84ecd445 | |||
8de1e99907 | |||
499af9294a | |||
bcf937c216 | |||
b8d90953d7 | |||
60a422147b | |||
3387415bad | |||
536ca3ec89 | |||
a4bb983190 | |||
39c205f555 | |||
6d502f33dc | |||
5ea27d089d | |||
1462d92588 | |||
7ba1a41f47 | |||
5ea088636f | |||
f32ddb3b1c | |||
79b75ece03 | |||
6348d73e55 | |||
fb36a1538a | |||
c81b8b910b | |||
85b60f31d0 | |||
227b5ffa36 | |||
36a64a253f | |||
c84b83c370 | |||
5136fd92c2 | |||
7d55637f0b | |||
0994506054 | |||
53c9a3a984 | |||
ed09075ca0 | |||
f07a81aa9f | |||
4183517076 | |||
f4668169a0 | |||
944ce49439 | |||
2e59dced12 | |||
e4e05981d6 | |||
3de9deead5 | |||
47f989f9b3 | |||
acc4e13dee | |||
ba6c2a8fd9 | |||
6576af00d7 | |||
8ac5db0169 | |||
61edb117a0 | |||
eb97b257eb | |||
479499dc0e | |||
d420a759c5 | |||
a1ab9b5e91 | |||
e22d38e4f2 | |||
856fbaa92f | |||
2c05efa4b1 | |||
c21fb10b28 | |||
26c9fd0cdc | |||
e6eed605cf | |||
abe3102cb7 | |||
1193e494a9 | |||
e5e951672e | |||
0e24559ad9 | |||
527ac800cf | |||
479bd77169 | |||
d8bf63a41b | |||
b82c8d76dc | |||
86346f811e | |||
c635f40a34 | |||
e0be0de1ee | |||
60dc6d003f | |||
eb27e0d834 | |||
a682fdce0c | |||
9ffbd3d969 | |||
6585a890b4 | |||
d0a050b51f | |||
e990d1b791 | |||
4a6d52efe6 | |||
8b841d430a | |||
b74b68212a | |||
3a27b2b91b | |||
d34445e960 | |||
f897eb7670 | |||
2f2841bfce | |||
09a1b61218 | |||
94e7da1ff2 | |||
c4aed6831e | |||
199579652e | |||
d17e7139d8 | |||
6a52eaea74 | |||
6aa1d7b892 | |||
262e865a70 | |||
ed733e85a1 | |||
5980b1ae77 | |||
0415a66044 | |||
7d134e3737 | |||
9df53b357e | |||
b2115b4d9b | |||
0164427dd5 | |||
627b11c78a | |||
472464453d | |||
11dddfbc9e | |||
384e214cc7 | |||
f2c680f893 | |||
fbe66da0e5 | |||
a815940e0e | |||
904e307bce | |||
491ec076b4 | |||
966433fdf2 | |||
6f1ba9d82d | |||
015ecd0001 | |||
b7c64a4352 | |||
7895d39508 | |||
22616f00f9 | |||
02c6fcbc2c | |||
3daeacad24 | |||
4d73962da4 | |||
068812650e | |||
4b7e059e15 | |||
30e35d7271 | |||
3623bd58f2 | |||
cb847c20a7 | |||
964b154a2a | |||
d7c2a04bce | |||
2bb4ca9cba | |||
a753a82462 | |||
276b08d8f0 | |||
4ca1e72fe0 | |||
16a66f103f | |||
330273901f | |||
42099a9342 | |||
90dd5fca9c | |||
2490f2a7f8 | |||
230e985633 | |||
ae24083f23 | |||
6463e36369 | |||
b3301f7d82 | |||
ab5d4d93ec | |||
2d6e9dd723 | |||
2f16e51553 | |||
0f0994902f | |||
5e1fcc1780 | |||
48f421de23 | |||
e7afb2b991 | |||
9a5ef7b169 | |||
453cc0fcf1 | |||
78dfec6bc5 | |||
f6d518fc4c | |||
ac33379a35 | |||
77e3e4a090 | |||
b840bb09be | |||
8b1c1c30a7 | |||
4b81335f75 | |||
2a4b5c9d7e | |||
04662748aa | |||
a117279e13 | |||
bbb292ed38 | |||
95e8901e71 | |||
4af9626702 | |||
c52d1035de | |||
5773a14980 | |||
6939147c47 | |||
98f9916c9f | |||
021eef1000 | |||
a9d06ce151 | |||
8c6a9b8bb6 | |||
37c88027e1 | |||
9db070a3c5 | |||
7fd8d9c220 | |||
06e059b8f8 | |||
c9f49d5f9d | |||
f4c1d7df39 | |||
339b8e559c | |||
5f6d6919b4 | |||
8ee767732f | |||
45f1f9144f | |||
53589c8f12 | |||
7ac2f17fac | |||
48862c7b27 | |||
44f7d9f4e3 | |||
fd12302587 | |||
f80bef4630 | |||
161b443514 | |||
ef7fbe1c66 | |||
0879d3599e | |||
2a444dc5bd | |||
45cf1634dc | |||
dcb2922d1d | |||
3c5c751174 | |||
24ad19d0e9 | |||
bd574b05af | |||
7e0eafcb1e | |||
75670ae673 | |||
d4fcdf602b | |||
1bebb1a116 | |||
ee437cde59 | |||
c1506d38cf | |||
c9541741e6 | |||
6a55015dc4 | |||
7e86030d4d | |||
401fbea326 | |||
44d1cbdfe9 | |||
3216efef2e | |||
2c0484ebf7 | |||
3298916e5e | |||
746bf2596f | |||
5f7e094ccb | |||
6266a9f9e5 | |||
d24f981fb2 | |||
01d3bd7d5c | |||
bb12cd9b77 | |||
f02b40bcb4 | |||
83ac2842bd | |||
c4e95fb74d | |||
e23721f3fb | |||
c0a9f8ef85 | |||
6477b84eb6 | |||
24d706774d | |||
5089ab2d6a | |||
bdbb906817 | |||
fa2ebd336e | |||
21b01a21b6 | |||
b54ce5edc5 | |||
26a31b78e9 | |||
14d13c5f9f | |||
5e110c2eb5 | |||
4a9926d521 | |||
ae3c5642d0 | |||
e287a3b627 | |||
b890243690 | |||
b7b38f7d68 | |||
9f67aab211 | |||
8f0f785d88 | |||
d0b8335789 | |||
1550be79f1 | |||
807f848c2f | |||
42398f13b0 | |||
31c3482a4e | |||
50257af686 | |||
d111a0987e | |||
915bcd2c63 | |||
f69c8b6f1b | |||
8c9044bef0 | |||
5f8e928194 | |||
25da30bd60 | |||
542734100e | |||
b06b4c0c08 | |||
939d36fb4c | |||
1471e41180 | |||
35949192e9 | |||
9c817edb48 | |||
24a0feb5d9 | |||
2ab8cce7e3 | |||
b40c255e98 | |||
ec3e16445e | |||
0665168ef3 | |||
5f6b992eea | |||
3e231ab9cc | |||
371bfaca8c | |||
91e30a3a23 | |||
1e122d66f9 | |||
63a4e09a0f | |||
75dd198870 | |||
1d48457aa6 | |||
307712a903 | |||
fbc9a05ddf | |||
28496ac55e | |||
b1c06c09b0 | |||
498ac0dc27 | |||
03af461de8 | |||
f19463ece2 | |||
5f8a086e22 | |||
a28d82e373 | |||
5ccca19f0c | |||
300c07b94d | |||
31aea563a8 | |||
0377596b77 | |||
c65d0fd3c8 | |||
d9efb664ac | |||
b5b4b0f5de | |||
ab36d02560 | |||
6e67749c00 | |||
ab0385f43b | |||
10eb603a3c | |||
a3231b2f2e | |||
13db492f83 | |||
741c138aa1 | |||
25f9fee6fb | |||
7c1570bee6 | |||
4078e4c388 | |||
a4a22daa8f | |||
e1936eb2a5 | |||
28b044dad9 | |||
b8f11a0a17 | |||
ff5a838099 | |||
84713613be | |||
ded89c9d08 | |||
042e95d92f | |||
81110c0174 | |||
c313723860 | |||
e69b2371e2 | |||
1531259b2c | |||
44bc2767fd | |||
bd7ace7adc | |||
315364d7de | |||
80753d4da8 | |||
8f9bdca4c4 | |||
4e10afb5a9 | |||
aa037a60f3 | |||
19dca2bb14 | |||
55e422109b | |||
3f020fac9d | |||
1626b73b03 | |||
850f7b19d3 | |||
d4bc413505 | |||
fc49ee4479 | |||
c0ea41f6b2 | |||
0fbaac9c89 | |||
a5abfe6a90 | |||
d3f7137cc9 | |||
f7c99e49b3 | |||
1d5752fa42 | |||
b6049060dd | |||
06a1da9daf | |||
746d173592 | |||
fdbfb460ed | |||
ebca09a3d1 | |||
9f346d0084 | |||
6a94163b91 | |||
8a35b58c4f | |||
1789abca84 | |||
847f94fdeb | |||
6e40108a59 | |||
1ba185f4af | |||
396089f3cf | |||
941912467d | |||
0b1b094a67 | |||
40e52a76b9 | |||
cf977670e6 | |||
df2c364de7 | |||
1acfadb721 | |||
ea642144d2 | |||
282a8654c4 | |||
936cf3beb7 | |||
bc92c2f8f0 | |||
f7d55e0614 | |||
f62a546e03 | |||
2944cb72d9 | |||
ccc2547210 | |||
162a455402 | |||
ff2cb0811f | |||
5e9d6baa48 | |||
845f8d663e | |||
31fdf05fda | |||
0ac6666cd2 | |||
6c91da80b8 | |||
c245168ba3 | |||
280fee8fa0 | |||
78b4c1c25f | |||
1edea2eb4b | |||
96808786b7 | |||
bb57ecb85e | |||
abdb73c7cc | |||
391e548a43 | |||
2a29afd4c6 | |||
5963004ff9 | |||
ede1718f6d | |||
2ef717b293 | |||
8feb375fbd | |||
69339af2d1 | |||
0d2e2aed80 | |||
451e9ee92c | |||
1133ac98a8 | |||
76d27eec9a | |||
fe18c29ab8 | |||
234f9bd320 | |||
3b183cfae7 | |||
02285dff81 | |||
2fc1d20f9e | |||
08e8414f27 | |||
05c6139625 | |||
896c41ef30 | |||
c36ddc43c6 | |||
13f41af43e | |||
3fc5306b82 | |||
adf2474b10 | |||
008816a257 | |||
33e5a6612e | |||
f0a7d65b3d | |||
54e5095765 | |||
34291099fb | |||
d245d7aec7 | |||
d661283e68 | |||
c0761c95f5 | |||
138e20b697 | |||
a8d9abfa22 | |||
195afd6dc1 | |||
1fd78999e8 | |||
374e9e0c5e | |||
a2cb5b4183 | |||
288ae5176e | |||
d868122a5a | |||
2ba25fb122 | |||
4f4687cb74 | |||
66b00fad0d | |||
c6cc8d16c3 | |||
3f8f8a78a2 | |||
3e47686919 | |||
a53b69a003 | |||
d1c9b47360 | |||
32f659861a | |||
a785232bf9 | |||
0677293503 | |||
1fbdb813c0 | |||
67725ac8f3 | |||
dac89af357 | |||
26225f1fb0 | |||
3468983315 | |||
c7515b0995 | |||
253ce30004 | |||
03a6fae484 | |||
d37fd275fd | |||
195877fd72 | |||
9e715e1b96 | |||
6f5514b6e2 | |||
709a22b92d | |||
01e214a1d7 | |||
1cecfe6a02 | |||
3764bc974c | |||
fcffc912a9 | |||
38d40b9972 | |||
09149ee0ae | |||
6b7f37dd5c | |||
791812fb54 | |||
5d6dc19f04 | |||
34972dbe22 | |||
bea43e0c64 | |||
3853d83d73 | |||
5b1ce40fa8 | |||
049b3a0e53 | |||
a551933542 | |||
5caa19240d | |||
5236f02784 | |||
2abaf19e0d | |||
6eb7a0ffbd | |||
e8f0f9b5f0 | |||
d8e24b877d | |||
cc68f31577 | |||
4a4a52bf98 | |||
c96906d84d | |||
9600fc3eb1 | |||
e2e55a6fed | |||
c4e1861d2c | |||
da9809f243 | |||
9d754a56cf | |||
8cc90a0e80 | |||
82b5c56f63 | |||
b2ad484c89 | |||
d96a17848f | |||
0e7798677a | |||
58a36d2e3b | |||
24d8534bd8 | |||
9b16ddd3a5 | |||
32f88af17b | |||
9bf7250bf9 | |||
17e49d3ab2 | |||
58b725282a | |||
7e59afa1e0 | |||
5ac022140e | |||
0eaa67280c | |||
5a62fdb735 | |||
60098d6204 | |||
317293e6a7 | |||
488a966c07 | |||
8954769aa2 | |||
df06468d9e | |||
1fbd828a5d | |||
d2986f8b07 | |||
8bfa8574e2 | |||
376567bf4f | |||
c0fd64a9c0 | |||
6e9596f6de | |||
9e3c5345cd | |||
b6c05ce82f | |||
52c80cac00 | |||
3643120690 | |||
d65786ea54 | |||
7f78675008 | |||
22fcd5fd11 | |||
993f0df419 | |||
9b1788483c | |||
ad37d26983 | |||
81c999fe0a | |||
4b7de08bfd | |||
4b9c4de1ad | |||
be88ee1d75 | |||
3ab19c744e | |||
6eac06759b | |||
2e9a5bd2c4 | |||
58323bf8ed | |||
22058f2dbc | |||
5b7979a1e6 | |||
ee14c02365 | |||
ab39dd34e1 | |||
b1348d3530 | |||
90641b5cf4 | |||
4160b930f1 | |||
7a96e661e4 | |||
a902fb4ab2 | |||
6cb38c3673 | |||
9cf14ebcbc | |||
8e39ee171f | |||
d26250f78c | |||
5218ea21b8 | |||
e60be821ce | |||
19708df884 | |||
3f190addda | |||
b355ee7cfa | |||
49ac8872b4 | |||
8ef98ae7e3 | |||
e471adcfa5 | |||
aa816c922c | |||
b3264eb266 | |||
eb2eb87a58 | |||
83fcb0e486 | |||
f7bb412878 | |||
ef6dcf0d0c | |||
c7ea4fd235 | |||
525f190917 | |||
dd916a2852 | |||
0620fe00ec | |||
31d0a9a14f | |||
c06970dd72 | |||
7598acf525 | |||
43ddfce969 | |||
a7e6d2cd9c | |||
86506b0c5c | |||
11182fae34 | |||
0bc8bffe1d | |||
8c4f30497a | |||
b1ee3a8444 | |||
be9a16fd3f | |||
f4d9a95b0f | |||
a8ab3abe09 | |||
fb6a835938 | |||
8923bb4292 | |||
fcba6aa352 | |||
8807fe608b | |||
3e94c7a81d | |||
77af3254e1 | |||
d4b3cffec4 | |||
b852a4c5ca | |||
2157abaab4 | |||
68d609a12c | |||
5a8ae474f0 | |||
84493d7f3e | |||
15d71189e9 | |||
37e962580f | |||
db0ea7a2f2 | |||
5498b0e6c0 | |||
2af4a52c39 | |||
eee2fe882e | |||
0d1a11e5e2 | |||
b2ead7d6f4 | |||
8da6fd4dff | |||
ab8ec9e940 | |||
701265bf38 | |||
fe36c90971 | |||
6739eb83c3 | |||
f68298ce06 | |||
7ae885c1ef | |||
d207c68822 | |||
16d72504fe | |||
1c31f9d4a8 | |||
8ecb2f1f68 | |||
5226c3d45c | |||
dbf9c15e30 | |||
d3f6c34976 | |||
425e2910a3 | |||
49868aa851 | |||
ff08e30ab5 | |||
95f2a191c0 | |||
00422ec3cf | |||
c5b05321e9 | |||
5dc636a65a | |||
73703a144f | |||
e89fdceec2 | |||
29a2739d27 | |||
ee6d17f6b4 | |||
95e90823d9 | |||
005cc45df3 | |||
c2c60dc9ba | |||
4af3194b7c | |||
4a2ba1a065 | |||
f096cc6807 | |||
e4bc83ab47 | |||
db7e0dbe6e | |||
bf88c94da9 | |||
3eea171cab | |||
64a56ebf13 | |||
bec9836849 | |||
c118733a29 | |||
bb3dd45524 | |||
04e7fa6f4f | |||
9f7f36d4c9 | |||
4a62efbb95 | |||
0a55a70b9b | |||
dc8cc2dd6f | |||
3efedb9511 | |||
e30c679928 | |||
bf4cb4abad | |||
e293f17d34 | |||
5d950c4b8d | |||
820446e230 | |||
54d5823ebe | |||
5181494e9f | |||
4a6e6e8b30 | |||
de29b193f6 | |||
922971041b | |||
63a767a134 | |||
30841fa786 | |||
3b1ac03828 | |||
990de617b5 | |||
6975600b4b | |||
061eeb9f61 | |||
4942b1b428 | |||
3c7cc5c437 | |||
5cd42ee2cc | |||
ee718f3da6 | |||
63eac1f608 | |||
b17ba2815b | |||
7a489af2f3 | |||
4a4ea13d6d | |||
174a461fc6 | |||
d8b7a24bc9 | |||
acf3832c9c | |||
d29ac44303 | |||
12638dfef0 | |||
f100b3b523 | |||
a99e213a82 | |||
7483d2b61c | |||
1fe5948227 | |||
760497e1ab | |||
b172e7714c | |||
dc01aadb18 | |||
e08c62149b | |||
abab4500fa | |||
e666315fa8 | |||
3f869af14c | |||
cbacb7634c | |||
6cc3b022ee | |||
e5e38d4920 | |||
2a6bab5655 | |||
8c01c9b85c | |||
d1123d795e | |||
9b3d784020 | |||
a16137d13d | |||
5582039d0a | |||
9a16c643e2 | |||
10a8a23100 | |||
29cfeef77f | |||
e66e9ea25b | |||
276779a849 | |||
1f35ce61c1 | |||
4b19cc3ed4 | |||
a535d348dd | |||
8f5dc729d9 | |||
02fc147a0b | |||
109148ac84 | |||
3563473d2c | |||
046834198d | |||
0a2ad9de06 | |||
39b0640b09 | |||
8dca71de64 | |||
812787cbc5 | |||
68ef10805e | |||
96fdb90f5f | |||
e98f9ac554 | |||
02d481595b | |||
7091c7ab5a | |||
d70ccb75f5 | |||
5ee048eb67 | |||
37ed71c964 | |||
8cd7a3df37 | |||
04a3279320 | |||
45ddda8e0c | |||
c41317fd66 | |||
96b8419b27 | |||
3c63f4cf35 | |||
5848dfd9c8 | |||
29ab5d0326 | |||
c4d6958b3e | |||
c9dcb75118 | |||
bbdbc3fc62 | |||
28c207a541 | |||
c23f830983 | |||
caeeb32b41 | |||
584cc1177a | |||
cc1ae10989 | |||
eb26f55b40 | |||
eb2b086584 | |||
67919cfe11 | |||
bf5fc81a8a | |||
2b07dc3186 | |||
951c463d39 | |||
7f257b210f | |||
705fe30a02 | |||
45b5b95e29 | |||
f2c47d1e6a | |||
b4bb9b9036 | |||
2bc6483299 | |||
ec52f900e4 | |||
77d708fabb | |||
c00149c861 | |||
574661f2e6 | |||
7bd69349bf | |||
488ad99c13 | |||
7178cceeaa | |||
8d55ccdb8c | |||
37a72cb170 | |||
bf9b69284f | |||
c4de1e19df | |||
5b7073cae1 | |||
b29b3b2924 | |||
420b6abc54 | |||
99804b0f3e | |||
c55964c956 | |||
20c542c713 | |||
c2bdb960cd | |||
87acd6d629 | |||
f842d31171 | |||
ffef323c4c | |||
af5833e298 | |||
b87494bb8f | |||
ad130431aa | |||
e130b66642 | |||
c7b6988678 | |||
05042a782d | |||
a7dc2aab16 | |||
22d46b7ba4 | |||
c10db6ea28 | |||
1b51fdf170 | |||
adee3f9c1f | |||
4798be1f9a | |||
08981d1bac | |||
7094ea5e75 | |||
9d5771ae43 | |||
f56b8305c4 | |||
1056ad762c | |||
c451080c8b | |||
8e7c22fbdb | |||
e57e95eb0d | |||
130f43e4b8 | |||
d8356a1cc2 | |||
4ef8d9f44e | |||
3928dbd206 | |||
2ced6f0742 | |||
30f73109b8 | |||
17fa62d3d3 | |||
1da5edcde0 | |||
0bb05b113d | |||
f141b2b938 | |||
2b434c449e | |||
e93081f83f | |||
b6bbce4ae9 | |||
7705dc52da | |||
e6acaf9d91 | |||
2c81e6fd51 | |||
9506267ce5 | |||
fbeb80b5f0 | |||
3fa7d29876 | |||
fe179ae0cc | |||
40aeeeecc4 | |||
5a863fbe18 | |||
91c646c61d | |||
accada542a | |||
e54329da7b | |||
284fac39fb | |||
fe454b8d9e | |||
c114b75aee | |||
4be936b88b | |||
26c550f772 | |||
24f0aa460b | |||
69efc39d5c | |||
a2ad810118 | |||
1ae1a9cd56 | |||
b5521fea19 | |||
9b84195225 | |||
11c1df0436 | |||
c754494fdd | |||
1bce67999d | |||
6c39ea46b6 | |||
156a33a990 | |||
5167ebdfca | |||
b574646d75 | |||
388c3462a6 | |||
9ad202bee9 | |||
f0d3fb4a7e | |||
9d4c8b8aa5 | |||
ecfac1e240 | |||
6f7140f568 | |||
05b17112cf | |||
a15fb5cd79 | |||
63fd148d8f | |||
6c3971b29b | |||
a6d264f331 | |||
2959686019 | |||
c96b0a938e | |||
c97796aa0f | |||
7a4f7d825e | |||
fdb2c87350 | |||
98c0b77e0c | |||
9d6d50d933 | |||
c1320c1f0c | |||
66aaf03a7a | |||
00a0947c65 | |||
60f3713026 | |||
37e6757453 | |||
8dcefdf4a9 | |||
73d13ad19a | |||
b6680fab50 | |||
f760756078 | |||
58210d6a76 | |||
8fac6455ff | |||
22b6598cc9 | |||
858452d58d | |||
7f85e1d7fd | |||
b0c3cbf2e8 | |||
a750868428 | |||
7395c70a74 | |||
9fab28135c | |||
08d3eef97d | |||
1b5439a6c2 | |||
c7f95b7ca2 | |||
5c554c04ff | |||
c383f091a1 | |||
8f253ef3af | |||
c7dc37f97c | |||
526332873b | |||
1d2721ca72 | |||
219e601dab | |||
3b8aade3c2 | |||
52ccd4a3a8 | |||
5275074d37 | |||
c15b4cda7d | |||
d3cfb6ca2b | |||
956ef860bc | |||
671b4bde6c | |||
c8eeb93a6a | |||
319fe5146e | |||
13c22321d1 | |||
ccbe9d5676 | |||
81a3c41aa0 | |||
a50207c65d | |||
97878e53fd | |||
61b05815e0 | |||
1dce94cf26 | |||
f12e982c0b | |||
fa966b9b40 | |||
b83a9fc9d3 | |||
3adbf2fb03 | |||
700d146127 | |||
a74fde9b4c | |||
1d7657f409 | |||
ac283dbce7 | |||
1e8f28c42a | |||
fc366b807a | |||
9fb308d90f | |||
2948c740a2 | |||
1558ec5a16 | |||
fff24a0148 | |||
48a145207e | |||
79d5765e7e | |||
04e48094e4 | |||
741abb162c | |||
e7794a868f | |||
725350d4ea | |||
906c73b219 | |||
00d80ff965 | |||
1b553b9817 | |||
de4d067f1e | |||
e715f6a601 | |||
f60ccfd83b | |||
3753a2b2a8 | |||
592dd25615 | |||
c8709d4604 | |||
8932c2d6ce | |||
2bddfdd7c8 | |||
46e3c3f112 | |||
ef24ae0c7d | |||
a753926f02 | |||
9dc60fc02d | |||
d73a63629e | |||
f79d0d4f74 | |||
4f88940ff6 | |||
7bdb1de9ec | |||
653d2e8ff9 | |||
2fef660d0a | |||
24eba5a2ff | |||
6e9d3aa32d | |||
9ae0d18856 | |||
a56f435fd4 | |||
ec166499d8 | |||
ccf022f970 | |||
2852e1af55 | |||
ce945b50c3 | |||
2f5a5a66dd | |||
8e409d1113 | |||
05d1b61af4 | |||
647cae178a | |||
bae7c23fbf | |||
18ea187d42 | |||
1daeffca54 | |||
2f6f1d4465 | |||
7ff1894c34 | |||
8edfc54c2b | |||
9c399689ec | |||
9d9a405cfd | |||
edd8b38a75 | |||
ed76818700 | |||
9a0b59d990 | |||
93a84a143b | |||
bd26876267 | |||
21d295180d | |||
c3bfc9bfda | |||
422a6b16fc | |||
11dd0d4482 | |||
26dd2f06ac | |||
8cee7c08b6 | |||
2e2626b167 | |||
c0c0ae2dea | |||
897412b5b6 | |||
f22d27a385 | |||
ccd7c1d2da | |||
c713eb5e2a | |||
25d313b38b | |||
3168dbf23b | |||
1711bb3881 | |||
2533305596 | |||
0eca512ac8 | |||
013e394a4b | |||
d83f371b5f | |||
1c71816eab | |||
7b1d8ea7e0 | |||
b1f7223a0a | |||
8408a4be8e | |||
72849c24ba | |||
c19c28be71 | |||
0d8fd8483a | |||
3170841ed9 | |||
7a6e385c1b | |||
578e47e70c | |||
fac5b43830 | |||
9e7c5212a1 | |||
1cb64f7368 | |||
f18738f247 | |||
a0ddd8392c | |||
a2506909b1 | |||
7b1ff212d9 | |||
e5d06cfc0f | |||
31891db2e3 | |||
5fdb27ff80 | |||
6b16927d18 | |||
ce411498f6 | |||
208de95ac7 | |||
c2ce39c795 | |||
8daa534818 | |||
9fca69b410 | |||
b26c645420 | |||
1879ec556e | |||
c6e53cfc46 | |||
b19f2fb815 | |||
a6b0950916 | |||
d352dbd163 | |||
eb23f4ef16 | |||
c56344b509 | |||
59119f4f20 | |||
276615d708 | |||
b602819b6e | |||
c2c606f05b | |||
83afebe872 | |||
a4d8f9d559 | |||
5ec1e0edfa | |||
30a11b1ab8 | |||
f04e6b87d7 | |||
0c33928b55 | |||
0775374750 | |||
7d90bb035b | |||
2c1ad21ba8 | |||
eca5ff9868 | |||
1b25d2fa0a | |||
74a6acc999 | |||
a4ed8a0821 | |||
9f675e021c | |||
a38efcb9fd | |||
31591649a0 | |||
4f5c46a84f | |||
462ffc58db | |||
65faae0b6a | |||
dda4b0ed06 | |||
07d04280be | |||
917c56ded4 | |||
3d42463845 | |||
3ffc83d90a | |||
e3c5e2cba8 | |||
b742f13e70 | |||
52c529eeb1 | |||
551529290d | |||
25a90ffa38 | |||
866b67ca93 | |||
d7e9f58f7f | |||
04839bae22 | |||
3cc6e04a52 | |||
b7ef178b9c | |||
47dfe9d4db | |||
1d3270cc8f | |||
a6fb6ab597 | |||
163e74b6c3 | |||
f273e66dc6 | |||
02b4c52c12 | |||
518199c09e | |||
8b17a2f776 | |||
b6d2827914 | |||
9711bae0b3 | |||
eec38f63bd | |||
ef5e6b746f | |||
77bf6b5f56 | |||
b562fff9d0 | |||
b5dec374f4 | |||
fa0dc6167c | |||
55bcd62a4b | |||
0ed762d691 | |||
1b5bb7792e | |||
9b735cea77 | |||
12c462d656 | |||
fc7b0e2c28 | |||
f850a067ed | |||
f75e1197f1 | |||
aa8a75e287 | |||
80e8a2ea39 | |||
19f8048139 | |||
0f80e5a80a | |||
b6559333ff | |||
434b8f3b96 | |||
7a74e929c8 | |||
361ecebe90 | |||
807cbc672e | |||
98ae5276b7 | |||
6adb969b09 | |||
8a7d6ff51a | |||
25f650a8e8 | |||
44e517f074 | |||
cb9de61659 | |||
a2ef80d66f | |||
baa190446a | |||
8f5220d81f | |||
8e391fcf3a | |||
593657054e | |||
ae5c4f7340 | |||
baa30bacdb | |||
3e6fad07aa | |||
e72e4158de | |||
bd41733db2 | |||
23c648e98d | |||
75ab2d06f5 | |||
adc099edee | |||
52cce82493 | |||
ef3c9ed9eb | |||
7fe3ed5e00 | |||
6061241292 | |||
0878ab7c15 | |||
c65edd5b64 | |||
3c8d14e9c5 | |||
c3977cb2ce | |||
6da1661bc2 | |||
cc56540661 | |||
94c1ae8668 | |||
55d54359e0 | |||
d33c2ad354 | |||
9afa7ff624 | |||
0649289f02 | |||
aaeaa43878 | |||
078b8e23bf | |||
74da3e1757 | |||
2d2c93a798 | |||
4bbb60efce | |||
1cf679dec4 | |||
41026c1e4b | |||
d6b9be21d7 | |||
c0329acde8 | |||
fb466b3417 | |||
1f50a7d29f | |||
1de21b913d | |||
4aea058e5a | |||
fd10234363 | |||
8fb5c6a409 | |||
2fe5fbfcc2 | |||
01637e1a4c | |||
1b349eb1f9 | |||
138eaebead | |||
61b9192f27 | |||
161b51d91a | |||
f904b31a7d | |||
f6614155e4 | |||
f5f159c320 | |||
6ebba525f1 | |||
2a5874441d | |||
d08445c9ad | |||
4a945696cb | |||
dabc964d83 | |||
654baf693d | |||
f001a3b7b6 | |||
c615f2c335 | |||
d839dd0242 | |||
435847891c | |||
182f290808 | |||
447dfc11fc | |||
9aa9f3b84e | |||
396ebd1e80 | |||
12490f4398 | |||
db078a9ba8 | |||
a13a7da5ad | |||
519f8e8684 | |||
40ae0962f4 | |||
1560288048 | |||
1ad6fafd91 | |||
70840aed5f | |||
b24d18feb9 | |||
3fa98f4395 | |||
d05b7ee90e | |||
6dcee35129 | |||
5cb345f5e9 | |||
fbcb52d3cd | |||
6b01e3fedd | |||
f7908f9bb8 | |||
00b7a4be02 | |||
04b0a768b8 | |||
87670425f2 | |||
32e71a1861 | |||
9c857cf280 | |||
97b12212dd | |||
9fa34d79ec | |||
a0a64a19dd | |||
bbc23611fa | |||
e9783a1fb4 | |||
9e0cc28792 | |||
73072a7c73 | |||
a8ba1262ff | |||
e66a9a7806 | |||
338442d773 | |||
10651bddf6 | |||
53d4d0b30d | |||
2865e4710b | |||
c46a74a19d | |||
46dc49a6a1 | |||
cc7f872131 | |||
bcc1658cd0 | |||
c46886f599 | |||
29f78392c1 | |||
022756a872 | |||
3b8c2dff57 | |||
0b9af32a8b | |||
11b1b63b14 | |||
0e26a6c92e | |||
66d8f0b7f1 | |||
ba5bcde874 | |||
ab0a8593c5 | |||
668ffc9b23 | |||
9962371f71 | |||
993acb5d41 | |||
a3d0aa73d1 | |||
14c57952f7 | |||
6c369d6788 | |||
4cdd9aad9b | |||
f38c057503 | |||
1e5544b39b | |||
d5673af79f | |||
a28dacec65 | |||
dbe29d4e33 | |||
fe3a67c546 | |||
b138ff2be3 | |||
cf6f1e4181 | |||
620a223814 | |||
f39f9690ec | |||
f9ca90256b | |||
2623640cd6 | |||
d87de61ae6 | |||
f5f485f899 | |||
e77b27c331 | |||
a5cc3dc8a2 | |||
37a709f655 | |||
3a5302108d | |||
d2ee117a0a | |||
db8ccdb850 | |||
d2419030b0 | |||
8986690c2a | |||
9286d3f584 | |||
940de9dbe9 | |||
88112c8afb | |||
375585c07c | |||
fd99ece8e3 | |||
8171e621fc | |||
ec03661b20 | |||
6335933a5b | |||
885b5563d0 | |||
9521ba6801 | |||
29511d33c7 | |||
7bc4d22337 | |||
afce6fa113 | |||
3163090d89 | |||
f0efd0202d | |||
3c28d1a571 | |||
e369243ebd | |||
a0ec3fac54 | |||
6559b538e5 | |||
73d5005880 | |||
6b094b6dfe | |||
641f2f4282 | |||
bfacd9f8ce | |||
f52e74d4dc | |||
23c21e92eb | |||
447d49530c | |||
9d6ebd877c | |||
0ba365f958 | |||
010c8ec3ab | |||
ffdb5c4735 | |||
a5881d619c | |||
34f70b3a56 | |||
8328d1900f | |||
d2bd5f0bdc | |||
34209a37a2 | |||
180e062eda | |||
5c7be85fdc | |||
146169ec38 | |||
9befab5ab9 | |||
9ac88f2b57 | |||
46f5b6cb08 | |||
eff3570f78 | |||
fa19bc4195 | |||
a01b2e0971 | |||
8159a9ab99 | |||
7516d9c16d | |||
46cc26d1b9 | |||
f784f9fa12 | |||
ca23f8ee6d | |||
e2f0eba2d4 | |||
d4353e48f7 | |||
bebf0da983 | |||
848e54f3ad | |||
7883d1cae4 | |||
ccc85b4ff8 | |||
c7606b47df | |||
d38af151a1 | |||
94267df08e | |||
8713c67133 | |||
57a60639bb | |||
bfbaa4dce5 | |||
1d79e78402 | |||
b6c5f49b78 | |||
d4231649e6 | |||
3e5c7feeff | |||
c23598e4ca | |||
54a08bde29 | |||
9f8bbd3fee | |||
3172006a24 | |||
684bc8bd70 | |||
b0502836b8 | |||
ec7a6f04f9 | |||
37947203e6 | |||
953419c69a | |||
0de8582f65 | |||
baeb733691 | |||
d03c60dd7f | |||
6a5d195109 | |||
0cbef75422 | |||
2cdfc4e025 | |||
973111088b | |||
11b503055e | |||
b629d2d4fe | |||
3bd7d48f51 | |||
435a6b74e3 | |||
75dc800d21 | |||
0c91aef2d8 | |||
3989b29a9b | |||
0463028bc2 | |||
39cfad0dee | |||
6d4d0b5b4b | |||
f96e1c5b78 | |||
8a2bee6717 | |||
d445098c8f | |||
74de25158e | |||
bce49a260e | |||
45c87b5481 | |||
dfe4bc6e59 | |||
54c978c3a3 | |||
9a7074d4aa | |||
a0040f5d12 | |||
940cdb1396 | |||
1b775cdd68 | |||
80bf931668 | |||
91c0b23384 | |||
2f668c330e | |||
08fa34882f | |||
4037705531 | |||
c76c11e59c | |||
9edbd0a204 | |||
707507ff6d | |||
7e1592d2cd | |||
903c9579b8 | |||
b440ef8c96 | |||
700f63a806 | |||
951a119926 | |||
1ca4041b86 | |||
80c1512fd5 | |||
0ac9cefd03 | |||
b8432f28f4 | |||
93935980f8 | |||
3fec2119e6 | |||
9b14418863 | |||
6ddc727fac | |||
acb5278cc8 | |||
0839209cab | |||
b39809668a | |||
3e9edc6845 | |||
bfc73f1fa2 | |||
f00c9bba33 | |||
b55b505690 | |||
2818de21ff | |||
aed5d40607 | |||
afa5477d1c | |||
01fcd42431 | |||
f990610776 | |||
64cb45fd79 | |||
ace6c12ec6 | |||
cac75be05b | |||
c3f319d7c2 | |||
ba3c333611 | |||
59a3d0cb57 | |||
6780c98e19 | |||
2f52783a08 | |||
7dec9d8cc4 | |||
fb0a24fba2 | |||
8e30bf3c02 | |||
99d3c105f5 | |||
18e9889418 | |||
8e46ba80d3 | |||
b0d35995c4 | |||
25466aa1c3 | |||
601c2d2181 | |||
175ffa64ee | |||
cb5fb0a12d | |||
b5bb5c85d4 | |||
7e54df414e | |||
20a80972f4 | |||
7ef3f3837e | |||
aad2dad38a | |||
66f2078878 | |||
8ce20f0f3d | |||
c84cf87261 | |||
c5f9acf4b7 | |||
7decc85eb7 | |||
21e8c67a4f | |||
a4bb2df36a | |||
b948361956 | |||
a792c4079c | |||
7b374c9ac9 | |||
a32c4aa482 | |||
a195bf899a | |||
ded17dc1cf | |||
a0bb409f51 | |||
a2684cd93a | |||
1450346214 | |||
fe5c1a7341 | |||
1fa360fc6e | |||
41bf19f613 | |||
9ad35bd740 | |||
fabf79fc67 | |||
925915ae37 | |||
97f4a7fee0 | |||
3998465721 | |||
4774d2feb0 | |||
6f0114f4a6 | |||
66616dbd4d | |||
62b81276e0 | |||
176d7e4e7b | |||
70e6fcd78b | |||
c8d0f5fe98 | |||
fdf58a6668 | |||
8ba42095c5 | |||
d6509bf78d | |||
85ed71aaec | |||
49c9472fa0 | |||
72deb41eb2 | |||
3f7a03ebe3 | |||
62642bb61c | |||
f1c9df5806 | |||
6c25fae1c4 | |||
44cb044e66 | |||
6c68218e3c | |||
f11f33f1c0 | |||
8ac23c9f77 | |||
14baf2e7f3 | |||
bc2dcf85fe | |||
1e45911f1a | |||
67564201ec | |||
5feb0dffba | |||
7dfc11843c | |||
6a7f3b8db2 | |||
207a12f5bc | |||
26b70395ff | |||
598f607e28 | |||
3ec7bfffe0 | |||
a7f822ef59 | |||
57543c169e | |||
5b9e59bc07 | |||
3f7436e8a0 | |||
ce6f747064 | |||
d7c936b44a | |||
9b926844e3 | |||
5e2b3407ef | |||
4e16a8fb63 | |||
77eab3fbfe | |||
041be06d58 | |||
429b9785c0 | |||
e410cfc3ce | |||
bc89f285d8 | |||
56a87ba45d | |||
95b02d76b0 | |||
a5defbc1b9 | |||
aaf0d41c7c | |||
0cb820e0f9 | |||
16564f554f | |||
fd01209d09 | |||
e693074aa6 | |||
d652cf12ec | |||
2b6a074305 | |||
5300117471 | |||
70af52a316 | |||
1d17cd5bb3 | |||
bf2449dfae | |||
4e4d00c67a | |||
9931d66400 | |||
1a548c048e | |||
14bee39b29 | |||
d458fcbc15 | |||
919e58b96a | |||
05bef0f0e9 | |||
5974c8facd | |||
0bcb64b184 | |||
0bf680fea2 | |||
b806420873 | |||
be5911a9f3 | |||
d375d73b2e | |||
7765770f89 | |||
872a85ae94 | |||
9c61f5f585 | |||
c94c469592 | |||
feac80dd3f | |||
fa8dbdc888 | |||
4a7d49af95 | |||
794b162a46 | |||
5fd1bdd7fc | |||
0ccd6746c9 | |||
d9b550c0a1 | |||
e9b091c92a | |||
1f30b99208 | |||
05c3ea3bc8 | |||
6108d3cc58 | |||
bab97c83d0 | |||
3eaeb030ff | |||
acec73ab6e | |||
5cc17418c7 | |||
3efb81dec6 | |||
94a7cd2a07 | |||
3e82ff4747 | |||
b5bd2f43c5 | |||
94aa56f19e | |||
4d89ee2e59 | |||
70567eff23 | |||
02ec83c5d5 | |||
2bd4b8d577 | |||
eecf2c3d41 | |||
c23588cc4b | |||
5108b30e6d | |||
f19e23fbd1 | |||
ea1f8a50d4 | |||
3dead611bb | |||
355da83690 | |||
3e5c49e59a | |||
5e47e223bd | |||
794ff3074a | |||
7e2afa4384 | |||
1c5edc3cb3 | |||
34b772727d | |||
2c856fb9e5 | |||
7727a40dc9 | |||
b5639ed313 | |||
2c4ac2627d | |||
674a8e579b | |||
001083a769 | |||
62b51c3070 | |||
61128870b8 | |||
78548dc03f | |||
66110dafcc | |||
b73a4638ac | |||
5f16420333 | |||
ccb47e7e10 | |||
677ad754a0 | |||
514cd04452 | |||
6704a81255 | |||
463e46338c | |||
2f889132c6 | |||
ebef1e8620 | |||
114df388fe | |||
ea36831459 | |||
69b8503935 | |||
0a2d1210bc | |||
859ffc994e | |||
5e6e2187a3 | |||
a7f1f33715 | |||
86ecfc6333 | |||
18e6fb0287 | |||
0f759f125d | |||
eefed45e37 | |||
aac1710afb | |||
21c1e6afc5 | |||
a47e812a54 | |||
42c6855103 | |||
0be9cd3497 | |||
e5c197d8aa | |||
7cd1d3bc34 | |||
82637b8e9f | |||
4a0deb8b1e |
28
.devops/cublas.Dockerfile
Normal file
28
.devops/cublas.Dockerfile
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=11.7.1
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git cmake libsdl2-dev wget git
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
|
# Enable cuBLAS
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
|
RUN make base.en
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/main"]
|
40
.devops/main-cuda.Dockerfile
Normal file
40
.devops/main-cuda.Dockerfile
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=12.3.1
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
# Target the CUDA runtime image
|
||||||
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
|
# Enable cuBLAS
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential libsdl2-dev wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
# Ref: https://stackoverflow.com/a/53464012
|
||||||
|
ENV CUDA_MAIN_VERSION=12.3
|
||||||
|
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
COPY .. .
|
||||||
|
RUN make base.en
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
||||||
|
ENV CUDA_MAIN_VERSION=12.3
|
||||||
|
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y curl ffmpeg wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
COPY --from=build /app /app
|
||||||
|
ENTRYPOINT [ "bash", "-c" ]
|
19
.devops/main.Dockerfile
Normal file
19
.devops/main.Dockerfile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
FROM ubuntu:22.04 AS build
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
COPY .. .
|
||||||
|
RUN make base.en
|
||||||
|
|
||||||
|
FROM ubuntu:22.04 AS runtime
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y curl ffmpeg libsdl2-dev wget cmake git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
||||||
|
|
||||||
|
COPY --from=build /app /app
|
||||||
|
ENTRYPOINT [ "bash", "-c" ]
|
10
.github/workflows/bindings-go.yml
vendored
10
.github/workflows/bindings-go.yml
vendored
@ -10,13 +10,13 @@ on:
|
|||||||
- whisper.h
|
- whisper.h
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
ubuntu-22:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/setup-go@v3
|
- uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: '^1.19'
|
go-version: '^1.23'
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v4
|
||||||
- run: |
|
- run: |
|
||||||
cd bindings/go
|
cd bindings/go
|
||||||
make test
|
make test
|
||||||
|
61
.github/workflows/bindings-ruby.yml
vendored
61
.github/workflows/bindings-ruby.yml
vendored
@ -3,20 +3,63 @@ on:
|
|||||||
push:
|
push:
|
||||||
paths:
|
paths:
|
||||||
- bindings/ruby/**
|
- bindings/ruby/**
|
||||||
- whisper.h
|
- src/**/*.c
|
||||||
|
- src/**/*.cpp
|
||||||
|
- src/**/*.h
|
||||||
|
- src/**/*.m
|
||||||
|
- src/**/*.metal
|
||||||
|
- include/**/*.c
|
||||||
|
- include/**/*.cpp
|
||||||
|
- include/**/*.h
|
||||||
|
- include/**/*.m
|
||||||
|
- include/**/*.metal
|
||||||
|
- ggml/**/*.c
|
||||||
|
- ggml/**/*.cpp
|
||||||
|
- ggml/**/*.h
|
||||||
|
- ggml/**/*.m
|
||||||
|
- ggml/**/*.metal
|
||||||
|
- scripts/get-flags.mk
|
||||||
|
- examples/common.h
|
||||||
|
- examples/common.cpp
|
||||||
|
- examples/common-whisper.h
|
||||||
|
- examples/common-whisper.cpp
|
||||||
|
- examples/stb_vorbis.c
|
||||||
|
- examples/miniaudio.h
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- bindings/ruby/**
|
- bindings/ruby/**
|
||||||
- whisper.h
|
- src/**/*.c
|
||||||
|
- src/**/*.cpp
|
||||||
|
- src/**/*.h
|
||||||
|
- src/**/*.m
|
||||||
|
- src/**/*.metal
|
||||||
|
- include/**/*.c
|
||||||
|
- include/**/*.cpp
|
||||||
|
- include/**/*.h
|
||||||
|
- include/**/*.m
|
||||||
|
- include/**/*.metal
|
||||||
|
- ggml/**/*.c
|
||||||
|
- ggml/**/*.cpp
|
||||||
|
- ggml/**/*.h
|
||||||
|
- ggml/**/*.m
|
||||||
|
- ggml/**/*.metal
|
||||||
|
- scripts/get-flags.mk
|
||||||
|
- examples/common.h
|
||||||
|
- examples/common.cpp
|
||||||
|
- examples/common-whisper.h
|
||||||
|
- examples/common-whisper.cpp
|
||||||
|
- examples/stb_vorbis.c
|
||||||
|
- examples/miniaudio.h
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
ubuntu-22:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: bindings/ruby
|
||||||
steps:
|
steps:
|
||||||
- uses: ruby/setup-ruby@v1
|
- uses: ruby/setup-ruby@v1
|
||||||
with:
|
with:
|
||||||
ruby-version: '3.0'
|
ruby-version: '3.1'
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v4
|
||||||
- run: |
|
- run: rake test
|
||||||
cd bindings/ruby/ext
|
|
||||||
ruby extconf.rb && make
|
|
||||||
|
744
.github/workflows/build.yml
vendored
744
.github/workflows/build.yml
vendored
@ -1,118 +1,444 @@
|
|||||||
name: CI
|
name: CI
|
||||||
on: [push, pull_request]
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
ubuntu_image: "ubuntu:22.04"
|
||||||
|
VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest:
|
ubuntu-22:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
arch: [linux/amd64, linux/ppc64le]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Set up QEMU
|
||||||
run: |
|
uses: docker/setup-qemu-action@v3
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential
|
|
||||||
sudo apt-get install libsdl2-dev
|
|
||||||
|
|
||||||
- name: Build
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
make
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
make stream
|
-v ${{ github.workspace }}:/workspace \
|
||||||
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential libsdl2-dev cmake git
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release -j $(nproc)'
|
||||||
|
|
||||||
|
ubuntu-22-arm64:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
arch: [linux/arm64]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
|
run: |
|
||||||
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
|
-v ${{ github.workspace }}:/workspace \
|
||||||
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential libsdl2-dev cmake git
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
|
||||||
|
cmake --build build --config Release -j $(nproc)'
|
||||||
|
|
||||||
|
ubuntu-22-arm-v7:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
arch: [linux/arm/v7]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
|
run: |
|
||||||
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
|
-v ${{ github.workspace }}:/workspace \
|
||||||
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential libsdl2-dev cmake git
|
||||||
|
cmake -B build -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
|
||||||
|
cmake --build build --config Release -j $(nproc)'
|
||||||
|
|
||||||
macOS-latest:
|
macOS-latest:
|
||||||
runs-on: macOS-latest
|
runs-on: macOS-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2.16
|
||||||
|
with:
|
||||||
|
key: macOS-latest-swift
|
||||||
|
evict-old-files: 1d
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
brew update
|
brew update
|
||||||
brew install sdl2
|
brew install sdl2 cmake
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
make
|
sysctl -a
|
||||||
make stream
|
cmake -B build -G Xcode \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DWHISPER_BUILD_EXAMPLES=OFF \
|
||||||
|
-DWHISPER_BUILD_TESTS=OFF \
|
||||||
|
-DWHISPER_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
||||||
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
ubuntu-latest-gcc:
|
- name: xcodebuild for swift package
|
||||||
runs-on: ubuntu-latest
|
id: xcodebuild
|
||||||
|
run: |
|
||||||
|
./build-xcframework.sh
|
||||||
|
|
||||||
|
|
||||||
|
# freeBSD-latest:
|
||||||
|
# runs-on: macos-12
|
||||||
|
#
|
||||||
|
# steps:
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
#
|
||||||
|
# - name: Build
|
||||||
|
# uses: cross-platform-actions/action@v0.24.0
|
||||||
|
# with:
|
||||||
|
# operating_system: freebsd
|
||||||
|
# version: '13.3'
|
||||||
|
# run: |
|
||||||
|
# sudo pkg update
|
||||||
|
# sudo pkg install -y gmake sdl2 cmake
|
||||||
|
# cmake -B build
|
||||||
|
# cmake --build build --config Release
|
||||||
|
|
||||||
|
ubuntu-22-gcc:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
build: [Debug, Release]
|
build: [Debug, Release]
|
||||||
|
arch: [linux/amd64, linux/ppc64le]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
sudo apt-get install build-essential
|
-v ${{ github.workspace }}:/workspace \
|
||||||
sudo apt-get install cmake
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
sudo apt-get install libsdl2-dev
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential cmake libsdl2-dev git
|
||||||
|
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
|
make
|
||||||
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
- name: Configure
|
ubuntu-22-gcc-arm64:
|
||||||
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
make
|
|
||||||
ctest -L gh --output-on-failure
|
|
||||||
|
|
||||||
ubuntu-latest-clang:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
build: [Debug, Release]
|
build: [Debug, Release]
|
||||||
|
arch: [linux/arm64]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
sudo apt-get install build-essential
|
-v ${{ github.workspace }}:/workspace \
|
||||||
sudo apt-get install cmake
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
sudo apt-get install libsdl2-dev
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential cmake libsdl2-dev git
|
||||||
|
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8-a
|
||||||
|
make
|
||||||
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
- name: Configure
|
ubuntu-22-gcc-arm-v7:
|
||||||
run: cmake . -DWHISPER_SUPPORT_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
make
|
|
||||||
ctest -L gh --output-on-failure
|
|
||||||
|
|
||||||
ubuntu-latest-gcc-sanitized:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
build: [Debug, Release]
|
||||||
|
arch: [linux/arm/v7]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
|
run: |
|
||||||
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
|
-v ${{ github.workspace }}:/workspace \
|
||||||
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential cmake libsdl2-dev git
|
||||||
|
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv7-a+fp
|
||||||
|
make
|
||||||
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
|
ubuntu-22-clang:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
build: [Debug, Release]
|
||||||
|
#arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
# TODO: arm/v7 disabled due to clang bug
|
||||||
|
# https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/ppc64le]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
|
run: |
|
||||||
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
|
-v ${{ github.workspace }}:/workspace \
|
||||||
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y clang build-essential cmake libsdl2-dev git
|
||||||
|
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
||||||
|
make
|
||||||
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
|
ubuntu-22-gcc-sanitized:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||||
|
arch: [linux/amd64]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Build ${{ matrix.arch }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
docker run --platform ${{ matrix.arch }} --rm \
|
||||||
sudo apt-get install build-essential
|
-v ${{ github.workspace }}:/workspace \
|
||||||
sudo apt-get install cmake
|
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
||||||
|
set -e
|
||||||
|
apt update
|
||||||
|
apt install -y build-essential cmake git
|
||||||
|
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
||||||
|
make
|
||||||
|
ctest -L gh --output-on-failure'
|
||||||
|
|
||||||
- name: Configure
|
ubuntu-22-cmake-sycl:
|
||||||
run: cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
dwhisper_sycl: [ON]
|
||||||
|
dcmake_c_compiler: [icx]
|
||||||
|
dcmake_cxx_compiler: [icpx]
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: add oneAPI to apt
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd /tmp
|
||||||
|
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||||
|
|
||||||
|
- name: install oneAPI dpcpp compiler
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
|
||||||
|
|
||||||
|
- name: install oneAPI MKL library
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt install intel-oneapi-mkl-devel git
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
run: |
|
run: |
|
||||||
make
|
source /opt/intel/oneapi/setvars.sh
|
||||||
ctest -L gh --output-on-failure
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
|
ubuntu-22-cmake-sycl-fp16:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
dwhisper_sycl: [ON]
|
||||||
|
dcmake_c_compiler: [icx]
|
||||||
|
dcmake_cxx_compiler: [icpx]
|
||||||
|
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
||||||
|
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: add oneAPI to apt
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd /tmp
|
||||||
|
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||||
|
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
||||||
|
|
||||||
|
- name: install oneAPI dpcpp compiler
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install intel-oneapi-compiler-dpcpp-cpp git
|
||||||
|
|
||||||
|
- name: install oneAPI MKL library
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt install intel-oneapi-mkl-devel
|
||||||
|
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
|
windows-msys2:
|
||||||
|
runs-on: windows-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
||||||
|
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup ${{ matrix.sys }}
|
||||||
|
uses: msys2/setup-msys2@v2
|
||||||
|
with:
|
||||||
|
update: true
|
||||||
|
msystem: ${{matrix.sys}}
|
||||||
|
install: >-
|
||||||
|
base-devel
|
||||||
|
git
|
||||||
|
mingw-w64-${{matrix.env}}-toolchain
|
||||||
|
mingw-w64-${{matrix.env}}-cmake
|
||||||
|
mingw-w64-${{matrix.env}}-SDL2
|
||||||
|
mingw-w64-${{matrix.env}}-openblas
|
||||||
|
|
||||||
|
- name: Build using CMake
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
cmake -B build -DWHISPER_SDL2=ON
|
||||||
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
|
- name: Clean after building using CMake
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
rm -rf build
|
||||||
|
|
||||||
|
- name: Build using CMake w/ OpenBLAS
|
||||||
|
shell: msys2 {0}
|
||||||
|
run: |
|
||||||
|
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
windows:
|
windows:
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
@ -125,17 +451,19 @@ jobs:
|
|||||||
include:
|
include:
|
||||||
- arch: Win32
|
- arch: Win32
|
||||||
s2arc: x86
|
s2arc: x86
|
||||||
|
jnaPath: win32-x86
|
||||||
- arch: x64
|
- arch: x64
|
||||||
s2arc: x64
|
s2arc: x64
|
||||||
|
jnaPath: win32-x86-64
|
||||||
- sdl2: ON
|
- sdl2: ON
|
||||||
s2ver: 2.26.0
|
s2ver: 2.28.5
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Fetch SDL2 and set SDL2_DIR
|
- name: Fetch SDL2 and set SDL2_DIR
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -148,7 +476,7 @@ jobs:
|
|||||||
run: >
|
run: >
|
||||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
@ -159,9 +487,15 @@ jobs:
|
|||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
|
- name: Upload dll
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: ${{ matrix.jnaPath }}_whisper.dll
|
||||||
|
path: build/bin/${{ matrix.build }}/whisper.dll
|
||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v1
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-bin-${{ matrix.arch }}
|
name: whisper-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
@ -177,29 +511,31 @@ jobs:
|
|||||||
sdl2: [ON]
|
sdl2: [ON]
|
||||||
include:
|
include:
|
||||||
- arch: Win32
|
- arch: Win32
|
||||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
|
||||||
s2arc: x86
|
s2arc: x86
|
||||||
- arch: x64
|
- arch: x64
|
||||||
obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
|
||||||
s2arc: x64
|
s2arc: x64
|
||||||
- sdl2: ON
|
- sdl2: ON
|
||||||
s2ver: 2.26.0
|
s2ver: 2.28.5
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Export GitHub Actions cache environment variables
|
||||||
|
uses: actions/github-script@v7
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
|
||||||
|
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
|
||||||
|
|
||||||
- name: Add msbuild to PATH
|
- name: Add msbuild to PATH
|
||||||
uses: microsoft/setup-msbuild@v1
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
- name: Fetch OpenBLAS
|
- name: Install OpenBLAS and pkgconfiglite
|
||||||
if: matrix.blas == 'ON'
|
if: matrix.blas == 'ON'
|
||||||
run: |
|
run: |
|
||||||
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
vcpkg install --triplet=${{ matrix.s2arc }}-windows openblas
|
||||||
7z x blas.zip -oblas -y
|
choco install pkgconfiglite
|
||||||
copy blas/include/cblas.h .
|
|
||||||
copy blas/include/openblas_config.h .
|
|
||||||
echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Fetch SDL2 and set SDL2_DIR
|
- name: Fetch SDL2 and set SDL2_DIR
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -211,19 +547,20 @@ jobs:
|
|||||||
- name: Configure
|
- name: Configure
|
||||||
run: >
|
run: >
|
||||||
cmake -S . -B ./build -A ${{ matrix.arch }}
|
cmake -S . -B ./build -A ${{ matrix.arch }}
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_INSTALLATION_ROOT/scripts/buildsystems/vcpkg.cmake"
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
-DWHISPER_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
-DGGML_BLAS=${{ matrix.blas }}
|
||||||
-DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
-DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
-DWHISPER_SUPPORT_SDL2=${{ matrix.sdl2 }}
|
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
cd ./build
|
cd ./build
|
||||||
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
||||||
|
|
||||||
- name: Copy libopenblas.dll
|
- name: Copy openblas.dll
|
||||||
if: matrix.blas == 'ON'
|
if: matrix.blas == 'ON'
|
||||||
run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
run: copy "C:/vcpkg/packages/openblas_${{ matrix.s2arc }}-windows/bin/openblas.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
- name: Copy SDL2.dll
|
- name: Copy SDL2.dll
|
||||||
if: matrix.sdl2 == 'ON'
|
if: matrix.sdl2 == 'ON'
|
||||||
@ -231,13 +568,81 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload binaries
|
- name: Upload binaries
|
||||||
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
||||||
uses: actions/upload-artifact@v1
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: whisper-blas-bin-${{ matrix.arch }}
|
name: whisper-blas-bin-${{ matrix.arch }}
|
||||||
path: build/bin/${{ matrix.build }}
|
path: build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
|
windows-cublas:
|
||||||
|
runs-on: windows-2019
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
build: [Release]
|
||||||
|
arch: [x64]
|
||||||
|
cublas: [ON]
|
||||||
|
sdl2: [ON]
|
||||||
|
cuda-toolkit: [12.2.0, 11.8.0]
|
||||||
|
include:
|
||||||
|
- arch: x64
|
||||||
|
sdl2: ON
|
||||||
|
sdl2_ver: 2.28.5
|
||||||
|
steps:
|
||||||
|
- name: Clone repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Add msbuild to PATH
|
||||||
|
uses: microsoft/setup-msbuild@v2
|
||||||
|
|
||||||
|
- name: Install CUDA Toolkit
|
||||||
|
id: cuda-toolkit
|
||||||
|
uses: Jimver/cuda-toolkit@v0.2.15
|
||||||
|
with:
|
||||||
|
cuda: '${{ matrix.cuda-toolkit }}'
|
||||||
|
|
||||||
|
- name: Install 7-Zip
|
||||||
|
run: choco install 7zip -y
|
||||||
|
|
||||||
|
- name: Fetch SDL2 and set SDL2_DIR
|
||||||
|
if: matrix.sdl2 == 'ON'
|
||||||
|
run: |
|
||||||
|
Invoke-WebRequest -Uri https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.sdl2_ver }}/SDL2-devel-${{ matrix.sdl2_ver }}-VC.zip -OutFile sdl2.zip
|
||||||
|
7z x sdl2.zip
|
||||||
|
echo "SDL2_DIR=${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" | Out-File -FilePath $env:GITHUB_ENV -Append
|
||||||
|
echo "${{ github.workspace }}\SDL2-${{ matrix.sdl2_ver }}\cmake" > SDL2_PATH.txt
|
||||||
|
|
||||||
|
- name: Configure CMake
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
cmake -S . -B ./build -A ${{ matrix.arch }} ^
|
||||||
|
-DCMAKE_BUILD_TYPE=${{ matrix.build }} ^
|
||||||
|
-DGGML_CUDA=${{ matrix.cublas }} ^
|
||||||
|
-DCMAKE_CUDA_ARCHITECTURES=all ^
|
||||||
|
-DWHISPER_SDL2=${{ matrix.sdl2 }} ^
|
||||||
|
-DSDL2_DIR="%SDL2_DIR%"
|
||||||
|
|
||||||
|
- name: Build Project
|
||||||
|
shell: cmd
|
||||||
|
run: |
|
||||||
|
cd ./build
|
||||||
|
cmake --build . --config ${{ matrix.build }}
|
||||||
|
|
||||||
|
- name: Copy CUDA DLLs
|
||||||
|
run: |
|
||||||
|
Get-ChildItem "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/" -Filter "*.dll" |
|
||||||
|
Copy-Item -Destination "build/bin/${{ matrix.build }}"
|
||||||
|
|
||||||
|
- name: Copy SDL2.dll
|
||||||
|
if: matrix.sdl2 == 'ON'
|
||||||
|
run: copy "$env:SDL2_DIR/../lib/${{ matrix.arch }}/SDL2.dll" build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
|
- name: Upload binaries
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
|
||||||
|
path: build/bin/${{ matrix.build }}
|
||||||
|
|
||||||
emscripten:
|
emscripten:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@ -245,23 +650,172 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Setup emsdk
|
||||||
run: |
|
uses: mymindstorm/setup-emsdk@v14
|
||||||
wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
|
|
||||||
tar -xvf master.tar.gz
|
|
||||||
emsdk-master/emsdk update
|
|
||||||
emsdk-master/emsdk install latest
|
|
||||||
emsdk-master/emsdk activate latest
|
|
||||||
|
|
||||||
- name: Configure
|
- name: Verify
|
||||||
run: echo "tmp"
|
run: emcc -v
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
pushd emsdk-master
|
|
||||||
source ./emsdk_env.sh
|
|
||||||
popd
|
|
||||||
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
||||||
make
|
make
|
||||||
|
|
||||||
|
ios-xcode-build:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
build: [Release]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Configure
|
||||||
|
run: |
|
||||||
|
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
|
||||||
|
mkdir models/ggml-base.en-encoder.mlmodelc
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
sysctl -a
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake -G Xcode .. \
|
||||||
|
-DGGML_METAL_USE_BF16=ON \
|
||||||
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
|
-DWHISPER_BUILD_EXAMPLES=OFF \
|
||||||
|
-DWHISPER_BUILD_TESTS=OFF \
|
||||||
|
-DWHISPER_BUILD_SERVER=OFF \
|
||||||
|
-DCMAKE_SYSTEM_NAME=iOS \
|
||||||
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
|
- name: xcodebuild for swift package
|
||||||
|
id: xcodebuild
|
||||||
|
run: |
|
||||||
|
./build-xcframework.sh
|
||||||
|
|
||||||
|
- name: Build objc example
|
||||||
|
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO FRAMEWORK_FOLDER_PATH=./build-ios build
|
||||||
|
|
||||||
|
- name: Build swiftui example
|
||||||
|
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
|
||||||
|
|
||||||
|
android:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: whisper
|
||||||
|
|
||||||
|
- name: Install Java
|
||||||
|
uses: actions/setup-java@v4
|
||||||
|
with:
|
||||||
|
distribution: zulu
|
||||||
|
java-version: 21
|
||||||
|
|
||||||
|
- name: Setup Android SDK
|
||||||
|
uses: android-actions/setup-android@v3
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
cd whisper/examples/whisper.android
|
||||||
|
./gradlew assembleRelease --no-daemon
|
||||||
|
|
||||||
|
- name: Build with external ggml
|
||||||
|
run: |
|
||||||
|
export PATH_TO_GGML=$PWD/ggml
|
||||||
|
cd whisper/examples/whisper.android
|
||||||
|
./gradlew assembleRelease --no-daemon
|
||||||
|
|
||||||
|
# TODO: disable because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/11019444420/job/30627193602
|
||||||
|
# android_java:
|
||||||
|
# runs-on: ubuntu-22.04
|
||||||
|
#
|
||||||
|
# steps:
|
||||||
|
# - name: Clone
|
||||||
|
# uses: actions/checkout@v4
|
||||||
|
#
|
||||||
|
# - name: set up JDK 11
|
||||||
|
# uses: actions/setup-java@v4
|
||||||
|
# with:
|
||||||
|
# java-version: '11'
|
||||||
|
# distribution: 'temurin'
|
||||||
|
# cache: gradle
|
||||||
|
#
|
||||||
|
# - name: Setup Android SDK
|
||||||
|
# uses: android-actions/setup-android@v3
|
||||||
|
# with:
|
||||||
|
# cmdline-tools-version: 9.0
|
||||||
|
#
|
||||||
|
# - name: Build
|
||||||
|
# run: |
|
||||||
|
# cd examples/whisper.android.java
|
||||||
|
# chmod +x ./gradlew
|
||||||
|
# ./gradlew assembleRelease
|
||||||
|
|
||||||
|
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
|
||||||
|
# java:
|
||||||
|
# needs: [ 'windows' ]
|
||||||
|
# runs-on: windows-latest
|
||||||
|
# steps:
|
||||||
|
# - uses: actions/checkout@v4
|
||||||
|
#
|
||||||
|
# - name: Install Java
|
||||||
|
# uses: actions/setup-java@v4
|
||||||
|
# with:
|
||||||
|
# distribution: zulu
|
||||||
|
# java-version: 20
|
||||||
|
#
|
||||||
|
# - name: Download Windows lib
|
||||||
|
# uses: actions/download-artifact@v4
|
||||||
|
# with:
|
||||||
|
# name: win32-x86-64_whisper.dll
|
||||||
|
# path: bindings/java/build/generated/resources/main/win32-x86-64
|
||||||
|
#
|
||||||
|
# - name: Build
|
||||||
|
# run: |
|
||||||
|
# models\download-ggml-model.cmd tiny.en
|
||||||
|
# cd bindings/java
|
||||||
|
# chmod +x ./gradlew
|
||||||
|
# ./gradlew build
|
||||||
|
#
|
||||||
|
# - name: Upload jar
|
||||||
|
# uses: actions/upload-artifact@v4
|
||||||
|
# with:
|
||||||
|
# name: whispercpp.jar
|
||||||
|
# path: bindings/java/build/libs/whispercpp-*.jar
|
||||||
|
#
|
||||||
|
# - name: Publish package
|
||||||
|
# if: ${{ github.ref == 'refs/heads/master' }}
|
||||||
|
# uses: gradle/gradle-build-action@v2.4.2
|
||||||
|
# with:
|
||||||
|
# arguments: publish
|
||||||
|
# build-root-directory: bindings/java
|
||||||
|
# env:
|
||||||
|
# MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
|
||||||
|
# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
|
||||||
|
# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||||
|
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||||
|
|
||||||
|
quantize:
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Test quantize
|
||||||
|
run: |
|
||||||
|
./models/download-ggml-model.sh tiny.en
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
|
./build/bin/quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
|
||||||
|
61
.github/workflows/docker.yml
vendored
Normal file
61
.github/workflows/docker.yml
vendored
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
name: Publish Docker image
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
push_to_registry:
|
||||||
|
name: Push Docker image to Docker Hub
|
||||||
|
if: github.event.pull_request.draft == false
|
||||||
|
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
env:
|
||||||
|
COMMIT_SHA: ${{ github.sha }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
config:
|
||||||
|
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
|
||||||
|
#TODO: the cuda image keeps failing - disable for now
|
||||||
|
# https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
|
||||||
|
#- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check out the repo
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
with:
|
||||||
|
image: tonistiigi/binfmt:qemu-v7.0.0-28
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.repository_owner }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image (versioned)
|
||||||
|
if: github.event_name == 'push'
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: ${{ matrix.config.platform }}
|
||||||
|
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||||
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image (tagged)
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: ${{ github.event_name == 'push' }}
|
||||||
|
platforms: ${{ matrix.config.platform }}
|
||||||
|
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
||||||
|
file: ${{ matrix.config.dockerfile }}
|
8
.github/workflows/examples.yml
vendored
8
.github/workflows/examples.yml
vendored
@ -10,8 +10,8 @@ on:
|
|||||||
- whisper.h
|
- whisper.h
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
addon_node-ubuntu-latest:
|
addon_node-ubuntu-22:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
node-version: [ 16.x, 18.x ]
|
node-version: [ 16.x, 18.x ]
|
||||||
@ -22,7 +22,7 @@ jobs:
|
|||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install build-essential
|
sudo apt-get install build-essential git
|
||||||
sudo apt-get install cmake
|
sudo apt-get install cmake
|
||||||
sudo apt-get install libsdl2-dev
|
sudo apt-get install libsdl2-dev
|
||||||
|
|
||||||
@ -37,7 +37,7 @@ jobs:
|
|||||||
run: npm install
|
run: npm install
|
||||||
|
|
||||||
- name: Compile addon.node
|
- name: Compile addon.node
|
||||||
run: npx cmake-js compile -T whisper-addon -B Release
|
run: npx cmake-js compile -T addon.node -B Release
|
||||||
|
|
||||||
- name: Download test model
|
- name: Download test model
|
||||||
run: |
|
run: |
|
||||||
|
42
.gitignore
vendored
42
.gitignore
vendored
@ -1,24 +1,37 @@
|
|||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
|
*.d
|
||||||
.cache/
|
.cache/
|
||||||
|
.coreml/
|
||||||
|
.test/
|
||||||
|
.venv/
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.vimspector.json
|
||||||
|
/CMakeSettings.json
|
||||||
|
/talk-llama.dSYM/
|
||||||
|
|
||||||
build/
|
build/
|
||||||
build-em/
|
build-*/
|
||||||
build-debug/
|
|
||||||
build-release/
|
# SPM
|
||||||
build-static/
|
.build/
|
||||||
build-no-accel/
|
.swiftpm
|
||||||
build-sanitize-addr/
|
*.metallib
|
||||||
build-sanitize-thread/
|
|
||||||
|
ggml-metal-embed.metal
|
||||||
|
ggml-metal-embed.metal.tmp
|
||||||
|
|
||||||
/main
|
/main
|
||||||
/stream
|
/stream
|
||||||
/command
|
/command
|
||||||
/talk
|
/talk
|
||||||
|
/talk-llama
|
||||||
/bench
|
/bench
|
||||||
|
/quantize
|
||||||
|
/server
|
||||||
|
/lsp
|
||||||
|
|
||||||
arm_neon.h
|
arm_neon.h
|
||||||
sync.sh
|
sync.sh
|
||||||
@ -32,3 +45,18 @@ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
|||||||
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
||||||
|
|
||||||
extra/bench-gg.txt
|
extra/bench-gg.txt
|
||||||
|
|
||||||
|
models/*.mlmodel
|
||||||
|
models/*.mlmodelc
|
||||||
|
models/*.mlpackage
|
||||||
|
bindings/java/.gradle/
|
||||||
|
bindings/java/.idea/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
benchmark_results.csv
|
||||||
|
cmake-build-debug/
|
||||||
|
.cxx/
|
||||||
|
.gradle/
|
||||||
|
local.properties
|
||||||
|
.log
|
||||||
|
.exe
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
|||||||
[submodule "bindings/ios"]
|
|
||||||
path = bindings/ios
|
|
||||||
url = https://github.com/ggerganov/whisper.spm
|
|
||||||
|
510
AUTHORS
Normal file
510
AUTHORS
Normal file
@ -0,0 +1,510 @@
|
|||||||
|
# date: Tue Feb 4 13:03:35 EET 2025
|
||||||
|
# this file is auto-generated by scripts/gen-authors.sh
|
||||||
|
|
||||||
|
0/0 <zero@imaskeleton.me>
|
||||||
|
0cc4m <picard12@live.de>
|
||||||
|
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
||||||
|
65a <10104049+65a@users.noreply.github.com>
|
||||||
|
AIWintermuteAI <32562299+AIWintermuteAI@users.noreply.github.com>
|
||||||
|
AT <manyoso@users.noreply.github.com>
|
||||||
|
Aarni Koskela <akx@iki.fi>
|
||||||
|
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
||||||
|
Aaron Taylor <aaron@exphat.com>
|
||||||
|
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||||
|
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
||||||
|
Adam Jones <domdomegg+git@gmail.com>
|
||||||
|
Adrien Gallouët <adrien@gallouet.fr>
|
||||||
|
Adrien Gallouët <angt@huggingface.co>
|
||||||
|
AfryMask <AfryMask@163.com>
|
||||||
|
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
||||||
|
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||||
|
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||||
|
AidanBeltonS <aidan.belton@codeplay.com>
|
||||||
|
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||||
|
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||||
|
Akash Mahajan <akash7190@gmail.com>
|
||||||
|
Akash Mahajan <akashmjn@stanford.edu>
|
||||||
|
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
||||||
|
Alan <unknown>
|
||||||
|
Albert Jin <albert.jin@gmail.com>
|
||||||
|
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||||
|
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||||
|
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
||||||
|
Alex Azarov <alex@azarov.by>
|
||||||
|
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
||||||
|
Alex Evgrashin <aevgrashin@yandex.ru>
|
||||||
|
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
||||||
|
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
||||||
|
Alexandru Mariuti <alex@mariuti.com>
|
||||||
|
Alexey Kharlamov <alexey@kharlamov.biz>
|
||||||
|
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
||||||
|
Ali Alameh <ali.alameh@isae.edu.lb>
|
||||||
|
Alter <0x7c48@gmail.com>
|
||||||
|
Ananta Bastola <anantarajbastola@gmail.com>
|
||||||
|
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
||||||
|
Andreas Lubbe <git@lubbe.org>
|
||||||
|
Andreu Huguet <andreuhuguet@gmail.com>
|
||||||
|
Andrew Huynh <a5thuynh@gmail.com>
|
||||||
|
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||||
|
Andrew S <andrews54757@gmail.com>
|
||||||
|
Andy Maloney <asmaloney@gmail.com>
|
||||||
|
Anton Kostin <masguit42@users.noreply.github.com>
|
||||||
|
Artyom Mezin <psycho.fading@gmail.com>
|
||||||
|
Asad Memon <asad.lionpk@gmail.com>
|
||||||
|
Ashraful Islam <ashraful.meche@gmail.com>
|
||||||
|
AsukaMinato <asukaminato@nyan.eu.org>
|
||||||
|
AustinMroz <austinmroz@utexas.edu>
|
||||||
|
Avik Sengupta <avik@sengupta.net>
|
||||||
|
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
||||||
|
Baffin Lee <baffinlee@gmail.com>
|
||||||
|
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
||||||
|
Ben Nortier <bjnortier@gmail.com>
|
||||||
|
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
||||||
|
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
||||||
|
Binozo <70137898+Binozo@users.noreply.github.com>
|
||||||
|
Bo-Yi Wu <appleboy.tw@gmail.com>
|
||||||
|
Boris Bliznioukov <blib@mail.com>
|
||||||
|
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||||
|
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
||||||
|
Brian Murray <brian@bmurray.ca>
|
||||||
|
CRD716 <crd716@gmail.com>
|
||||||
|
Canis Lupus <Canis-UK@users.noreply.github.com>
|
||||||
|
Carlos Zoido <mrgalleta@gmail.com>
|
||||||
|
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||||
|
CarterLi999 <664681047@qq.com>
|
||||||
|
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
||||||
|
Changyeon Kim <cyzero.kim@samsung.com>
|
||||||
|
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
||||||
|
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||||
|
Charles Xu <charles.xu@arm.com>
|
||||||
|
Chen Xi <xi2.chen@intel.com>
|
||||||
|
Chen Xi <xixichen08@foxmail.com>
|
||||||
|
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
||||||
|
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
||||||
|
Chidi Williams <williamschidi1@gmail.com>
|
||||||
|
Chris Elrod <elrodc@gmail.com>
|
||||||
|
Christian <12550267+iceychris@users.noreply.github.com>
|
||||||
|
Christian Kastner <ckk@kvr.at>
|
||||||
|
Clifford Heath <clifford.heath@gmail.com>
|
||||||
|
Clint Herron <hanclinto@gmail.com>
|
||||||
|
Colin <github@whoisc.cc>
|
||||||
|
Conrad Kramer <conrad@conradkramer.com>
|
||||||
|
Corey Earwood <iamcgn+github@gmail.com>
|
||||||
|
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||||
|
DAN™ <dranger003@gmail.com>
|
||||||
|
DGdev91 <DGdev91@users.noreply.github.com>
|
||||||
|
Damian Czaja <trojan295@protonmail.com>
|
||||||
|
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
||||||
|
Dan Johansson <dan.johansson@arm.com>
|
||||||
|
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||||
|
Daniel Valdivia <18384552+dvaldivia@users.noreply.github.com>
|
||||||
|
Daniel Ziegenberg <daniel@ziegenberg.at>
|
||||||
|
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||||
|
Dave <dave-fl@users.noreply.github.com>
|
||||||
|
Dave Airlie <airlied@gmail.com>
|
||||||
|
Dave Airlie <airlied@redhat.com>
|
||||||
|
Daven Sanassy <daven@vochlea.co.uk>
|
||||||
|
David <dnhkng@gmail.com>
|
||||||
|
David Thorpe <djt@mutablelogic.com>
|
||||||
|
DavidKorczynski <david@adalogics.com>
|
||||||
|
Davidson Francis <davidsondfgl@gmail.com>
|
||||||
|
Dener Stassun <denerstassun@gmail.com>
|
||||||
|
Dibakar Gope <dibakar.gope@arm.com>
|
||||||
|
Didzis Gosko <didzis@users.noreply.github.com>
|
||||||
|
Diego Devesa <slarengh@gmail.com>
|
||||||
|
Digipom <admin@digipom.com>
|
||||||
|
Dimo <dimo@ieee.org>
|
||||||
|
Djip007 <3705339+Djip007@users.noreply.github.com>
|
||||||
|
Djip007 <djip.perois@free.fr>
|
||||||
|
Dody Suria Wijaya <dodysw@gmail.com>
|
||||||
|
Dou Xinpeng <15529241576@163.com>
|
||||||
|
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
||||||
|
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||||
|
Duncan McConnell <ddmcconnell4@gmail.com>
|
||||||
|
Egor Egorov <me@egorfine.com>
|
||||||
|
Elkana Bardugo <ttv200@gmail.com>
|
||||||
|
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
||||||
|
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||||
|
Eric Curtin <ericcurtin17@gmail.com>
|
||||||
|
Eric Swanson <eswanson@alloscomp.com>
|
||||||
|
Eric Tendian <erictendian@gmail.com>
|
||||||
|
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||||
|
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||||
|
Evan Jones <evan.q.jones@gmail.com>
|
||||||
|
Evan Martin <evan.martin@gmail.com>
|
||||||
|
Eve <139727413+netrunnereve@users.noreply.github.com>
|
||||||
|
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
||||||
|
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
||||||
|
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
||||||
|
Fangjun Kuang <csukuangfj@gmail.com>
|
||||||
|
Felix <stenbackfelix@gmail.com>
|
||||||
|
Finn Voorhees <finnvoorhees@gmail.com>
|
||||||
|
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||||
|
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
||||||
|
Frankie Robertson <frankier@users.noreply.github.com>
|
||||||
|
Gang Chen <goncha@gmail.com>
|
||||||
|
Gavin Cai <gavin1818@hotmail.com>
|
||||||
|
George Hindle <george@georgehindle.com>
|
||||||
|
Georgi Gerganov <ggerganov@gmail.com>
|
||||||
|
Gilad S <7817232+giladgd@users.noreply.github.com>
|
||||||
|
Gilad S <giladgd@users.noreply.github.com>
|
||||||
|
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||||
|
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
||||||
|
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||||
|
Gleicon Moraes <gleicon@gmail.com>
|
||||||
|
Gregor Jasny <gjasny@googlemail.com>
|
||||||
|
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||||
|
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
||||||
|
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||||
|
Hang <bebound@gmail.com>
|
||||||
|
Haus1 <haus.xda@gmail.com>
|
||||||
|
Herman Semenov <GermanAizek@yandex.ru>
|
||||||
|
HimariO <dsfhe49854@gmail.com>
|
||||||
|
Hong Bo PENG <penghb@cn.ibm.com>
|
||||||
|
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
||||||
|
Hugo <hugo@whynothugo.nl>
|
||||||
|
Ian Bicking <ian@ianbicking.org>
|
||||||
|
Ian Bull <irbull@eclipsesource.com>
|
||||||
|
Ihar Hrachyshka <ihrachys@redhat.com>
|
||||||
|
Ikko Ashimine <eltociear@gmail.com>
|
||||||
|
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||||
|
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
||||||
|
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
||||||
|
Ivan <nekotekina@gmail.com>
|
||||||
|
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||||
|
Ivan Gorin <ivangorin21@gmail.com>
|
||||||
|
Ivo von Putzer Reibegg <ivo.putzer@gmail.com>
|
||||||
|
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
||||||
|
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||||
|
JacobLinCool <jacoblincool@gmail.com>
|
||||||
|
Jakub Ráček <blizzcz@gmail.com>
|
||||||
|
Jared Van Bortel <jared@nomic.ai>
|
||||||
|
Jay Binks <jaybinks@gmail.com>
|
||||||
|
Jayant <jayantyadav202@gmail.com>
|
||||||
|
Jeff Bolz <jbolz@nvidia.com>
|
||||||
|
Jeroen Mostert <jeroen.mostert@cm.com>
|
||||||
|
Jhen-Jie Hong <developer@jhen.me>
|
||||||
|
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||||
|
JidongZhang-THU <1119708529@qq.com>
|
||||||
|
Jo Liss <joliss42@gmail.com>
|
||||||
|
Joe Todd <joe.todd@codeplay.com>
|
||||||
|
Johan <jr.raffin@gmail.com>
|
||||||
|
Johannes Gäßler <johannesg@5d6.de>
|
||||||
|
John Balis <phobossystems@gmail.com>
|
||||||
|
JohnnyB <jboero@users.noreply.github.com>
|
||||||
|
Jonathan Soo <jcsoo@agora.com>
|
||||||
|
Jonno <1160532+razodactyl@users.noreply.github.com>
|
||||||
|
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
||||||
|
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
||||||
|
Josh Bleecher Snyder <josharian@gmail.com>
|
||||||
|
Josscii <jossciiweiyi@gmail.com>
|
||||||
|
Judd <foldl@users.noreply.github.com>
|
||||||
|
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
||||||
|
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
||||||
|
Junil Kim <logyourself@gmail.com>
|
||||||
|
Justina Cho <justcho5@gmail.com>
|
||||||
|
Justine Tunney <jtunney@gmail.com>
|
||||||
|
Justine Tunney <jtunney@mozilla.com>
|
||||||
|
KITAITI Makoto <KitaitiMakoto@gmail.com>
|
||||||
|
KP Kaiser <kirk@zothcorp.com>
|
||||||
|
Kamilake <exjang0@gmail.com>
|
||||||
|
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||||
|
Karthick <j.karthic2004@gmail.com>
|
||||||
|
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
||||||
|
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||||
|
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||||
|
Kendrick Taylor <kendrick@circuitsix.com>
|
||||||
|
Kevin Brothaler <admin@digipom.com>
|
||||||
|
Kevin Gibbons <bakkot@gmail.com>
|
||||||
|
Konosuke Sakai <konosuke@konosuke.work>
|
||||||
|
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||||
|
Kreijstal <rainb@tfwno.gf>
|
||||||
|
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||||
|
LBlue <153975653+lbluep@users.noreply.github.com>
|
||||||
|
Larry Battle <larry.battle.tech@gmail.com>
|
||||||
|
Laytan Laats <laytanlaats@hotmail.com>
|
||||||
|
Leo Moll <leo.moll@yeasoft.com>
|
||||||
|
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
||||||
|
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
||||||
|
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
||||||
|
Luis Herrera <herrera-luis@users.noreply.github.com>
|
||||||
|
Lukas Rist <glaslos@gmail.com>
|
||||||
|
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
||||||
|
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
||||||
|
Ma Mingfei <mingfei.ma@intel.com>
|
||||||
|
Maciek <maciek.mab122@gmail.com>
|
||||||
|
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
||||||
|
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
||||||
|
Mark Karpelès <MagicalTux@users.noreply.github.com>
|
||||||
|
Mark Zhuang <zhuangqiubin@gmail.com>
|
||||||
|
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||||
|
Martin Delille <martin@delille.org>
|
||||||
|
Martin Warnaar <martinwarnaar@gmail.com>
|
||||||
|
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
||||||
|
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
||||||
|
Mathieu Baudier <mbaudier@argeo.org>
|
||||||
|
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||||
|
Matija Pevec <mightymatth@users.noreply.github.com>
|
||||||
|
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
||||||
|
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||||
|
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||||
|
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
||||||
|
Meng, Hengyu <hengyu.meng@intel.com>
|
||||||
|
Mengqing Cao <cmq0113@163.com>
|
||||||
|
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||||
|
Michael Rienstra <mrienstra@gmail.com>
|
||||||
|
Mikhail Grigorev <sleuthhound@gmail.com>
|
||||||
|
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||||
|
Mohit Agarwal <mohit@sdf.org>
|
||||||
|
Molly Sophia <mollysophia379@gmail.com>
|
||||||
|
Murilo Santana <mvrilo@gmail.com>
|
||||||
|
NETZkultur GmbH <mulholland@netzkultur.de>
|
||||||
|
Natsu <chino@hotococoa.moe>
|
||||||
|
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
||||||
|
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||||
|
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||||
|
Neuman Vong <neuman.vong@gmail.com>
|
||||||
|
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
||||||
|
Nicholas Albion <nalbion@yahoo.com>
|
||||||
|
Nico Bosshard <nico@bosshome.ch>
|
||||||
|
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
||||||
|
Niels Mayer <Niels.Mayer@gmail.com>
|
||||||
|
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
||||||
|
Nikolaj Olsson <nikse.dk@gmail.com>
|
||||||
|
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
||||||
|
Oleg Sidorov <me@whitebox.io>
|
||||||
|
Oleg Sidorov <oleg@sidorov.nl>
|
||||||
|
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||||
|
Ondrej Kokes <ondrej.kokes@gmail.com>
|
||||||
|
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||||
|
PAB <pierreantoine.bannier@gmail.com>
|
||||||
|
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||||
|
Pedro Probst <pprobst@insiberia.net>
|
||||||
|
Peng <hzp1024@qq.com>
|
||||||
|
Peter <peter277@users.noreply.github.com>
|
||||||
|
Philipp Zabel <philipp.zabel@gmail.com>
|
||||||
|
Philippe Normand <phil@base-art.net>
|
||||||
|
Philippe Normand <philn@igalia.com>
|
||||||
|
Plamen Minev <pacominev@gmail.com>
|
||||||
|
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||||
|
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||||
|
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
||||||
|
R0CKSTAR <xiaodong.ye@mthreads.com>
|
||||||
|
R0CKSTAR <yeahdongcn@gmail.com>
|
||||||
|
Radoslav Gerganov <rgerganov@gmail.com>
|
||||||
|
Radosław Gryta <radek.gryta@gmail.com>
|
||||||
|
Rahul Vadhyar <107788610+RahulVadhyar@users.noreply.github.com>
|
||||||
|
Raiya Araki <83504221+rai62@users.noreply.github.com>
|
||||||
|
Reinforce-II <fate@eastal.com>
|
||||||
|
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
||||||
|
RelatedTitle <r3latedtitle@gmail.com>
|
||||||
|
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||||
|
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||||
|
Rich Jones <miserlou@gmail.com>
|
||||||
|
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
||||||
|
Robin <robin.xw@hotmail.com>
|
||||||
|
Roddur Dasgupta <roddurd@gmail.com>
|
||||||
|
Roland Rabien <figbug@gmail.com>
|
||||||
|
Romain Biessy <romain.biessy@codeplay.com>
|
||||||
|
Ronsor <ronsor@ronsor.pw>
|
||||||
|
Rotem Dan <rotemdan@gmail.com>
|
||||||
|
Ryan Hitchman <hitchmanr@gmail.com>
|
||||||
|
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
||||||
|
RyanChang <ftes90015@gmail.com>
|
||||||
|
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||||
|
SXX <sxx1136965276@gmail.com>
|
||||||
|
Sacha Arbonel <sacha.arbonel@hotmail.fr>
|
||||||
|
Salman Faroz <stsfaroz@gmail.com>
|
||||||
|
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
||||||
|
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
||||||
|
Sam Pullara <spullara@gmail.com>
|
||||||
|
Samuel Durante <44513615+samueldurantes@users.noreply.github.com>
|
||||||
|
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
||||||
|
Sandro Hanea <40202887+sandrohanea@users.noreply.github.com>
|
||||||
|
Sergio López <slp@redhat.com>
|
||||||
|
Sergio López <slp@sinrega.org>
|
||||||
|
Shanshan Shen <467638484@qq.com>
|
||||||
|
Shijie <821898965@qq.com>
|
||||||
|
Shupei Fan <dymarkfan@outlook.com>
|
||||||
|
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
||||||
|
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||||
|
Simon Moisselin <simon.moisstoll@gmail.com>
|
||||||
|
Sindre Sorhus <sindresorhus@gmail.com>
|
||||||
|
Slava Primenko <primenko.s@gmail.com>
|
||||||
|
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
||||||
|
Stavros Panakakis <53979866+Stavrospanakakis@users.noreply.github.com>
|
||||||
|
Stefan Sydow <s.sydow@heinlein-video.de>
|
||||||
|
Stefan Sydow <stefan@sydow.email>
|
||||||
|
Syahmi Azhar <prsyahmi@gmail.com>
|
||||||
|
Syed Jafri <syedjafri97@gmail.com>
|
||||||
|
Sơn Phan Trung <phantrungson17@gmail.com>
|
||||||
|
Taisei Mima <bhbstar.me@gmail.com>
|
||||||
|
Takeshi Inoue <inoue.takeshi@gmail.com>
|
||||||
|
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||||
|
Taras Glek <taras@thegp.com>
|
||||||
|
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
||||||
|
Thamster <Thamster@users.noreply.github.com>
|
||||||
|
Thijs Raymakers <thijs@raymakers.nl>
|
||||||
|
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
||||||
|
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
||||||
|
Tienshiao Ma <tienshiao@tienshiao.org>
|
||||||
|
Tim Miller <drasticactions@users.noreply.github.com>
|
||||||
|
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||||
|
Tobrun <tobrun.van.nuland@gmail.com>
|
||||||
|
Todd <taf2@users.noreply.github.com>
|
||||||
|
Toliver <teejae@gmail.com>
|
||||||
|
Tong Li <31761981+litongjava@users.noreply.github.com>
|
||||||
|
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
||||||
|
Topping1 <78745143+Topping1@users.noreply.github.com>
|
||||||
|
Travis Cline <travis.cline@gmail.com>
|
||||||
|
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||||
|
UsernamesLame <156965854+UsernamesLame@users.noreply.github.com>
|
||||||
|
Vadim Peretokin <vperetokin@hey.com>
|
||||||
|
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
||||||
|
Vin Misra <vinith@alum.mit.edu>
|
||||||
|
Vulcan <93451215+trholding@users.noreply.github.com>
|
||||||
|
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
||||||
|
William Tambellini <william.tambellini@gmail.com>
|
||||||
|
William Tambellini <wtambellini@sdl.com>
|
||||||
|
Wilson Silva <wilson.dsigns@gmail.com>
|
||||||
|
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||||
|
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||||
|
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||||
|
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||||
|
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
||||||
|
Xuan Son Nguyen <thichthat@gmail.com>
|
||||||
|
Yajing Tang <phillis@google.com>
|
||||||
|
Yang Shen <aplshenyang@gmail.com>
|
||||||
|
Yunès <jean.baptiste.yunes@free.fr>
|
||||||
|
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
||||||
|
Yusuf Redžić <48274562+redzic@users.noreply.github.com>
|
||||||
|
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
||||||
|
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
||||||
|
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
||||||
|
Zhiyuan Li <uniartisan2017@gmail.com>
|
||||||
|
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
||||||
|
Zollner <24618122+Zolliner@users.noreply.github.com>
|
||||||
|
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
||||||
|
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
||||||
|
agray3 <agray3@users.noreply.github.com>
|
||||||
|
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
||||||
|
aldorof <aldorof@users.noreply.github.com>
|
||||||
|
alonfaraj <alonfaraj@gmail.com>
|
||||||
|
amd-dwang <dong.wang@amd.com>
|
||||||
|
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
||||||
|
andypayne <apayne@gmail.com>
|
||||||
|
ardfork <134447697+ardfork@users.noreply.github.com>
|
||||||
|
arizhih <40765267+arizhih@users.noreply.github.com>
|
||||||
|
automaticcat <daogiatuank54@gmail.com>
|
||||||
|
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||||
|
be-next <jerome.ramette@gmail.com>
|
||||||
|
bert hubert <bert@hubertnet.nl>
|
||||||
|
billyct <billy_allen@126.com>
|
||||||
|
bmwl <brian.marshall@tolko.com>
|
||||||
|
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||||
|
bocytko <bocytko+github@gmail.com>
|
||||||
|
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
||||||
|
boolemancer <boolemancer@gmail.com>
|
||||||
|
bradmit <151883577+bradmit@users.noreply.github.com>
|
||||||
|
brunofaustino <b.fa.amorim@gmail.com>
|
||||||
|
bssrdf <merlintiger@hotmail.com>
|
||||||
|
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||||
|
cdosoftei <ciprian.dosoftei@gmail.com>
|
||||||
|
clach04 <Chris.Clark@actian.com>
|
||||||
|
compilade <113953597+compilade@users.noreply.github.com>
|
||||||
|
compilade <git@compilade.net>
|
||||||
|
conradg <conradjgodfrey@gmail.com>
|
||||||
|
crummyh <elijah@crums.us>
|
||||||
|
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||||
|
denersc <denerstassun@gmail.com>
|
||||||
|
dscripka <dscripka@users.noreply.github.com>
|
||||||
|
duthils <duthils@duthils.net>
|
||||||
|
ecneladis <ecneladis@users.noreply.github.com>
|
||||||
|
faker <nspyia2002@gmail.com>
|
||||||
|
fitzsim <fitzsim@fitzsim.org>
|
||||||
|
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||||
|
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||||
|
genevera (she/her) <genevera@users.noreply.github.com>
|
||||||
|
geniusnut <geniusnut@gmail.com>
|
||||||
|
gilbertgong <gilbert.gong@gmail.com>
|
||||||
|
gn64 <yukikaze.jp@gmail.com>
|
||||||
|
goldwaving <77494627+goldwaving@users.noreply.github.com>
|
||||||
|
greeshmay <greeshmay@gmail.com>
|
||||||
|
haopeng <657407891@qq.com>
|
||||||
|
hipudding <huafengchun@gmail.com>
|
||||||
|
hsinhoyeh <yhh92u@gmail.com>
|
||||||
|
hydai <z54981220@gmail.com>
|
||||||
|
iamthad <thadeus.j.fleming@gmail.com>
|
||||||
|
issixx <46835150+issixx@users.noreply.github.com>
|
||||||
|
james wolf <contractorwolf@hotmail.com>
|
||||||
|
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||||
|
jettoblack <jettoblack@gmail.com>
|
||||||
|
jiez <373447296@qq.com>
|
||||||
|
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||||
|
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
||||||
|
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||||
|
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
||||||
|
jwijffels <jwijffels@bnosac.be>
|
||||||
|
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||||
|
kamranjon <kamranjon@gmail.com>
|
||||||
|
katsu560 <katsu560oo-@docomo.ne.jp>
|
||||||
|
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
||||||
|
keyehzy <msamuel@aluno.puc-rio.br>
|
||||||
|
kunnis <kunnis@users.noreply.github.com>
|
||||||
|
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||||
|
leejet <leejet714@gmail.com>
|
||||||
|
leo-pony <nengjunma@outlook.com>
|
||||||
|
lhez <quic_lih@quicinc.com>
|
||||||
|
litong <31761981+litongjava@users.noreply.github.com>
|
||||||
|
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||||
|
lnyan <lkwq007@gmail.com>
|
||||||
|
luoyu-intel <yu.luo@intel.com>
|
||||||
|
m.bell <m.bell@techsmith.com>
|
||||||
|
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||||
|
mashizora <30516315+mashizora@users.noreply.github.com>
|
||||||
|
matt23654 <matthew.webber@protonmail.com>
|
||||||
|
matteo <matteogeniaccio@yahoo.it>
|
||||||
|
mgrachten <maarten@grachten.eu>
|
||||||
|
mkiol <mkiol@users.noreply.github.com>
|
||||||
|
mky_coder <47767389+mkycoder@users.noreply.github.com>
|
||||||
|
novag <7754358+novag@users.noreply.github.com>
|
||||||
|
pajowu <pajowu@pajowu.de>
|
||||||
|
pengxin99 <pengxin.yuan@intel.com>
|
||||||
|
petterreinholdtsen <pere-github@hungry.com>
|
||||||
|
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
||||||
|
rlapray <lapray.romain@gmail.com>
|
||||||
|
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
||||||
|
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
||||||
|
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
||||||
|
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
||||||
|
slaren <slarengh@gmail.com>
|
||||||
|
slashlib <slashlib@users.noreply.github.com>
|
||||||
|
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||||
|
someone13574 <81528246+someone13574@users.noreply.github.com>
|
||||||
|
st-gr <38470677+st-gr@users.noreply.github.com>
|
||||||
|
stduhpf <stephduh@live.fr>
|
||||||
|
stormofice <58337328+stormofice@users.noreply.github.com>
|
||||||
|
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||||
|
thefinaldegree <thefinaldegree@gmail.com>
|
||||||
|
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||||
|
toboil-features <160222185+toboil-features@users.noreply.github.com>
|
||||||
|
trixirt <trix@redhat.com>
|
||||||
|
ulatekh <ulatekh@yahoo.com>
|
||||||
|
undef <undefdev@gmail.com>
|
||||||
|
uvos <devnull@uvos.xyz>
|
||||||
|
uvos <philipp@uvos.xyz>
|
||||||
|
valVk <valVk@users.noreply.github.com>
|
||||||
|
venkr <venkateshrameshkumar+1@gmail.com>
|
||||||
|
vicalloy <zbirder@gmail.com>
|
||||||
|
wangshuai09 <391746016@qq.com>
|
||||||
|
woachk <24752637+woachk@users.noreply.github.com>
|
||||||
|
xctan <axunlei@gmail.com>
|
||||||
|
xdrudis <xavierdrudis@yahoo.es>
|
||||||
|
yuri@FreeBSD <yuri@FreeBSD>
|
||||||
|
zhangjixiong <code.zjx@gmail.com>
|
||||||
|
zhentaoyu <zhentao.yu@intel.com>
|
||||||
|
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||||
|
zhouwg <zhouwg2000@gmail.com>
|
||||||
|
谢乃闻 <sienaiwun@users.noreply.github.com>
|
||||||
|
布客飞龙 <562826179@qq.com>
|
||||||
|
Артём Земляк <azemlyak@smart-consulting.ru>
|
316
CMakeLists.txt
316
CMakeLists.txt
@ -1,21 +1,31 @@
|
|||||||
cmake_minimum_required (VERSION 3.0)
|
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
||||||
|
project("whisper.cpp" C CXX)
|
||||||
|
project("whisper.cpp" VERSION 1.7.4)
|
||||||
|
include(CheckIncludeFileCXX)
|
||||||
|
|
||||||
project(whisper.cpp VERSION 1.2.1)
|
set(SOVERSION 1)
|
||||||
|
|
||||||
|
#set(CMAKE_WARN_DEPRECATED YES)
|
||||||
|
set(CMAKE_WARN_UNUSED_CLI YES)
|
||||||
|
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||||
|
endif()
|
||||||
|
|
||||||
# Add path to modules
|
# Add path to modules
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
set(WHISPER_STANDALONE ON)
|
set(WHISPER_STANDALONE ON)
|
||||||
include(GitVars)
|
|
||||||
include(BuildTypes)
|
include(git-vars)
|
||||||
|
|
||||||
# configure project version
|
# configure project version
|
||||||
if (EXISTS "${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl")
|
|
||||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/ios/Makefile-tmpl ${CMAKE_SOURCE_DIR}/bindings/ios/Makefile @ONLY)
|
|
||||||
endif()
|
|
||||||
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
||||||
else()
|
else()
|
||||||
set(WHISPER_STANDALONE OFF)
|
set(WHISPER_STANDALONE OFF)
|
||||||
@ -25,6 +35,11 @@ if (EMSCRIPTEN)
|
|||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
|
||||||
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
||||||
|
|
||||||
|
# TODO: without these, we get the following error:
|
||||||
|
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
||||||
else()
|
else()
|
||||||
if (MINGW)
|
if (MINGW)
|
||||||
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
@ -33,223 +48,136 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# options
|
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
|
|
||||||
option(BUILD_SHARED_LIBS "whisper: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
|
#
|
||||||
|
# option list
|
||||||
|
#
|
||||||
|
|
||||||
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
# general
|
||||||
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
option(WHISPER_CCACHE "whisper: use ccache if available" ON)
|
||||||
|
|
||||||
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
# debug
|
||||||
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
||||||
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
|
||||||
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
# build
|
||||||
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
||||||
|
|
||||||
option(WHISPER_SUPPORT_SDL2 "whisper: support for libSDL2" OFF)
|
|
||||||
|
|
||||||
if (APPLE)
|
|
||||||
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
|
|
||||||
option(WHISPER_NO_AVX "whisper: disable AVX" OFF)
|
|
||||||
option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
|
|
||||||
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
|
|
||||||
else()
|
|
||||||
option(WHISPER_SUPPORT_OPENBLAS "whisper: support for OpenBLAS" OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
option(WHISPER_PERF "whisper: enable perf timings" OFF)
|
|
||||||
|
|
||||||
# sanitizers
|
# sanitizers
|
||||||
|
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
||||||
|
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
||||||
|
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
if (NOT MSVC)
|
# extra artifacts
|
||||||
if (WHISPER_SANITIZE_THREAD)
|
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
|
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
|
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_SANITIZE_ADDRESS)
|
# 3rd party libs
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
|
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_SANITIZE_UNDEFINED)
|
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
|
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
|
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
||||||
|
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
||||||
|
|
||||||
# dependencies
|
# Required for relocatable CMake package
|
||||||
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
# override ggml options
|
||||||
|
set(GGML_CCACHE ${WHISPER_CCACHE})
|
||||||
|
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
||||||
|
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
||||||
|
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
||||||
|
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
||||||
|
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
||||||
|
|
||||||
# on APPLE - include Accelerate framework
|
# transition helpers
|
||||||
if (APPLE AND NOT WHISPER_NO_ACCELERATE)
|
function (whisper_option_depr TYPE OLD NEW)
|
||||||
find_library(ACCELERATE_FRAMEWORK Accelerate)
|
if (${OLD})
|
||||||
if (ACCELERATE_FRAMEWORK)
|
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
||||||
message(STATUS "Accelerate framework found")
|
set(${NEW} ON)
|
||||||
|
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK})
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_ACCELERATE)
|
|
||||||
else()
|
|
||||||
message(WARNING "Accelerate framework not found")
|
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endfunction()
|
||||||
|
|
||||||
if (WHISPER_SUPPORT_OPENBLAS)
|
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
||||||
find_library(OPENBLAS_LIB
|
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
||||||
NAMES openblas libopenblas
|
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
||||||
)
|
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
||||||
if (OPENBLAS_LIB)
|
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
||||||
message(STATUS "OpenBLAS found")
|
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
||||||
|
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
||||||
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${OPENBLAS_LIB})
|
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
|
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
||||||
else()
|
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
||||||
message(WARNING "OpenBLAS not found")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# compiler flags
|
|
||||||
|
|
||||||
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (WHISPER_ALL_WARNINGS)
|
|
||||||
if (NOT MSVC)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \
|
|
||||||
-Wall \
|
|
||||||
-Wextra \
|
|
||||||
-Wpedantic \
|
|
||||||
-Wshadow \
|
|
||||||
-Wcast-qual \
|
|
||||||
-Wstrict-prototypes \
|
|
||||||
-Wpointer-arith \
|
|
||||||
-Wno-unused-function \
|
|
||||||
")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
|
|
||||||
-Wall \
|
|
||||||
-Wextra \
|
|
||||||
-Wpedantic \
|
|
||||||
-Wcast-qual \
|
|
||||||
")
|
|
||||||
else()
|
|
||||||
# todo : msvc
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=vla")
|
|
||||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -ffinite-math-only -funsafe-math-optimizations")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
|
||||||
message(STATUS "ARM detected")
|
|
||||||
else()
|
|
||||||
message(STATUS "x86 detected")
|
|
||||||
if (MSVC)
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
|
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2")
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
|
|
||||||
else()
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
|
||||||
else()
|
|
||||||
if(NOT WHISPER_NO_AVX)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_AVX2)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_FMA)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma")
|
|
||||||
endif()
|
|
||||||
if(NOT WHISPER_NO_F16C)
|
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (WHISPER_PERF)
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_PERF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# whisper - this is the main library of the project
|
# build the library
|
||||||
#
|
#
|
||||||
|
|
||||||
set(TARGET whisper)
|
if (NOT TARGET ggml)
|
||||||
|
add_subdirectory(ggml)
|
||||||
add_library(${TARGET}
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
||||||
ggml.h
|
|
||||||
ggml.c
|
|
||||||
whisper.h
|
|
||||||
whisper.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
include(DefaultTargetOptions)
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC
|
|
||||||
.
|
|
||||||
)
|
|
||||||
|
|
||||||
if (MSVC)
|
|
||||||
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
|
|
||||||
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
|
||||||
else()
|
|
||||||
target_link_libraries(${TARGET} PRIVATE m ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
|
||||||
endif()
|
endif()
|
||||||
|
add_subdirectory(src)
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
target_link_libraries(${TARGET} PUBLIC
|
|
||||||
${CMAKE_DL_LIBS}
|
|
||||||
)
|
|
||||||
|
|
||||||
target_compile_definitions(${TARGET} PUBLIC
|
|
||||||
WHISPER_SHARED
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
|
||||||
set_target_properties(${TARGET} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
target_compile_definitions(${TARGET} PUBLIC
|
|
||||||
${WHISPER_EXTRA_FLAGS}
|
|
||||||
)
|
|
||||||
|
|
||||||
set_target_properties(${TARGET} PROPERTIES PUBLIC_HEADER "whisper.h")
|
|
||||||
|
|
||||||
install(TARGETS ${TARGET}
|
|
||||||
LIBRARY DESTINATION lib
|
|
||||||
ARCHIVE DESTINATION lib/static
|
|
||||||
RUNTIME DESTINATION bin
|
|
||||||
PUBLIC_HEADER DESTINATION include
|
|
||||||
)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# bindings
|
# install
|
||||||
#
|
#
|
||||||
|
|
||||||
add_subdirectory(bindings)
|
include(GNUInstallDirs)
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
||||||
|
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
||||||
|
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
||||||
|
|
||||||
|
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
||||||
|
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||||
|
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||||
|
|
||||||
|
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||||
|
|
||||||
|
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
||||||
|
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
||||||
|
|
||||||
|
configure_package_config_file(
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||||
|
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
||||||
|
PATH_VARS
|
||||||
|
WHISPER_INCLUDE_INSTALL_DIR
|
||||||
|
WHISPER_LIB_INSTALL_DIR
|
||||||
|
WHISPER_BIN_INSTALL_DIR )
|
||||||
|
|
||||||
|
write_basic_package_version_file(
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||||
|
VERSION ${WHISPER_INSTALL_VERSION}
|
||||||
|
COMPATIBILITY SameMajorVersion)
|
||||||
|
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
||||||
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
||||||
|
|
||||||
|
configure_file(cmake/whisper.pc.in
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
||||||
|
DESTINATION lib/pkgconfig)
|
||||||
|
|
||||||
#
|
#
|
||||||
# programs, examples and tests
|
# programs, examples and tests
|
||||||
#
|
#
|
||||||
|
|
||||||
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||||
enable_testing()
|
#include(CTest)
|
||||||
add_subdirectory(tests)
|
#add_subdirectory(tests)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (WHISPER_BUILD_EXAMPLES)
|
if (WHISPER_BUILD_EXAMPLES)
|
||||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2022 Georgi Gerganov
|
Copyright (c) 2023-2024 The ggml authors
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
259
Makefile
259
Makefile
@ -1,233 +1,12 @@
|
|||||||
ifndef UNAME_S
|
|
||||||
UNAME_S := $(shell uname -s)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef UNAME_P
|
|
||||||
UNAME_P := $(shell uname -p)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef UNAME_M
|
|
||||||
UNAME_M := $(shell uname -m)
|
|
||||||
endif
|
|
||||||
|
|
||||||
CCV := $(shell $(CC) --version | head -n 1)
|
|
||||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
|
||||||
|
|
||||||
# Mac OS + Arm can report x86_64
|
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
ifneq ($(UNAME_P),arm)
|
|
||||||
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
|
|
||||||
ifeq ($(SYSCTL_M),1)
|
|
||||||
# UNAME_P := arm
|
|
||||||
# UNAME_M := arm64
|
|
||||||
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
#
|
|
||||||
# Compile flags
|
|
||||||
#
|
|
||||||
|
|
||||||
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
|
||||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
|
||||||
LDFLAGS =
|
|
||||||
|
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/37
|
|
||||||
ifneq ($(wildcard /usr/include/musl/*),)
|
|
||||||
CFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
|
||||||
CXXFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
|
|
||||||
endif
|
|
||||||
|
|
||||||
# OS specific
|
|
||||||
# TODO: support Windows
|
|
||||||
ifeq ($(UNAME_S),Linux)
|
|
||||||
CFLAGS += -pthread
|
|
||||||
CXXFLAGS += -pthread
|
|
||||||
endif
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
CFLAGS += -pthread
|
|
||||||
CXXFLAGS += -pthread
|
|
||||||
endif
|
|
||||||
ifeq ($(UNAME_S),FreeBSD)
|
|
||||||
CFLAGS += -pthread
|
|
||||||
CXXFLAGS += -pthread
|
|
||||||
endif
|
|
||||||
ifeq ($(UNAME_S),Haiku)
|
|
||||||
CFLAGS += -pthread
|
|
||||||
CXXFLAGS += -pthread
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Architecture specific
|
|
||||||
# TODO: probably these flags need to be tweaked on some architectures
|
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
|
||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
AVX1_M := $(shell sysctl machdep.cpu.features)
|
|
||||||
ifneq (,$(findstring FMA,$(AVX1_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
ifneq (,$(findstring AVX1.0,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
|
|
||||||
ifneq (,$(findstring AVX2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
else ifeq ($(UNAME_S),Linux)
|
|
||||||
AVX1_M := $(shell grep "avx " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
FMA_M := $(shell grep "fma " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring fma,$(FMA_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
endif
|
|
||||||
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring sse3,$(SSE3_M)))
|
|
||||||
CFLAGS += -msse3
|
|
||||||
endif
|
|
||||||
else ifeq ($(UNAME_S),Haiku)
|
|
||||||
AVX1_M := $(shell sysinfo -cpu | grep "AVX ")
|
|
||||||
ifneq (,$(findstring avx,$(AVX1_M)))
|
|
||||||
CFLAGS += -mavx
|
|
||||||
endif
|
|
||||||
AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ")
|
|
||||||
ifneq (,$(findstring avx2,$(AVX2_M)))
|
|
||||||
CFLAGS += -mavx2
|
|
||||||
endif
|
|
||||||
FMA_M := $(shell sysinfo -cpu | grep "FMA ")
|
|
||||||
ifneq (,$(findstring fma,$(FMA_M)))
|
|
||||||
CFLAGS += -mfma
|
|
||||||
endif
|
|
||||||
F16C_M := $(shell sysinfo -cpu | grep "F16C ")
|
|
||||||
ifneq (,$(findstring f16c,$(F16C_M)))
|
|
||||||
CFLAGS += -mf16c
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
CFLAGS += -mfma -mf16c -mavx -mavx2
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(UNAME_M),amd64)
|
|
||||||
CFLAGS += -mavx -mavx2 -mfma -mf16c
|
|
||||||
endif
|
|
||||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
|
||||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
|
||||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
|
||||||
CFLAGS += -mpower9-vector
|
|
||||||
endif
|
|
||||||
# Require c++23's std::byteswap for big-endian support.
|
|
||||||
ifeq ($(UNAME_M),ppc64)
|
|
||||||
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifndef WHISPER_NO_ACCELERATE
|
|
||||||
# Mac M1 - include Accelerate framework
|
|
||||||
ifeq ($(UNAME_S),Darwin)
|
|
||||||
CFLAGS += -DGGML_USE_ACCELERATE
|
|
||||||
LDFLAGS += -framework Accelerate
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifdef WHISPER_OPENBLAS
|
|
||||||
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
|
||||||
LDFLAGS += -lopenblas
|
|
||||||
endif
|
|
||||||
ifdef WHISPER_GPROF
|
|
||||||
CFLAGS += -pg
|
|
||||||
CXXFLAGS += -pg
|
|
||||||
endif
|
|
||||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
|
||||||
CFLAGS += -mcpu=native
|
|
||||||
CXXFLAGS += -mcpu=native
|
|
||||||
endif
|
|
||||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 1, 2, 3
|
|
||||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
|
||||||
endif
|
|
||||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 4
|
|
||||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
|
||||||
endif
|
|
||||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 4
|
|
||||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
|
||||||
endif
|
|
||||||
|
|
||||||
#
|
|
||||||
# Print build information
|
|
||||||
#
|
|
||||||
|
|
||||||
$(info I whisper.cpp build info: )
|
|
||||||
$(info I UNAME_S: $(UNAME_S))
|
|
||||||
$(info I UNAME_P: $(UNAME_P))
|
|
||||||
$(info I UNAME_M: $(UNAME_M))
|
|
||||||
$(info I CFLAGS: $(CFLAGS))
|
|
||||||
$(info I CXXFLAGS: $(CXXFLAGS))
|
|
||||||
$(info I LDFLAGS: $(LDFLAGS))
|
|
||||||
$(info I CC: $(CCV))
|
|
||||||
$(info I CXX: $(CXXV))
|
|
||||||
$(info )
|
|
||||||
|
|
||||||
default: main
|
|
||||||
|
|
||||||
#
|
|
||||||
# Build library
|
|
||||||
#
|
|
||||||
|
|
||||||
ggml.o: ggml.c ggml.h
|
|
||||||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
|
||||||
|
|
||||||
whisper.o: whisper.cpp whisper.h
|
|
||||||
$(CXX) $(CXXFLAGS) -c whisper.cpp -o whisper.o
|
|
||||||
|
|
||||||
libwhisper.a: ggml.o whisper.o
|
|
||||||
$(AR) rcs libwhisper.a ggml.o whisper.o
|
|
||||||
|
|
||||||
libwhisper.so: ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
|
|
||||||
|
|
||||||
#
|
|
||||||
# Examples
|
|
||||||
#
|
|
||||||
|
|
||||||
CC_SDL=`sdl2-config --cflags --libs`
|
|
||||||
|
|
||||||
SRC_COMMON = examples/common.cpp
|
|
||||||
SRC_COMMON_SDL = examples/common-sdl.cpp
|
|
||||||
|
|
||||||
main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
|
|
||||||
./main -h
|
|
||||||
|
|
||||||
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
|
|
||||||
|
|
||||||
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
|
|
||||||
|
|
||||||
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
|
|
||||||
|
|
||||||
bench: examples/bench/bench.cpp ggml.o whisper.o
|
|
||||||
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Audio samples
|
# Audio samples
|
||||||
#
|
#
|
||||||
|
|
||||||
|
.PHONY: build
|
||||||
|
build:
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
|
|
||||||
# download a few audio samples into folder "./samples":
|
# download a few audio samples into folder "./samples":
|
||||||
.PHONY: samples
|
.PHONY: samples
|
||||||
samples:
|
samples:
|
||||||
@ -237,12 +16,8 @@ samples:
|
|||||||
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
@wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||||
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
|
@wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
|
||||||
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
@wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
|
||||||
@echo "Converting to 16-bit WAV ..."
|
@wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
|
||||||
@ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
|
@wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
|
||||||
@ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
|
|
||||||
@ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
|
|
||||||
@ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
|
|
||||||
@rm samples/mm1.wav
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Models
|
# Models
|
||||||
@ -260,28 +35,24 @@ samples:
|
|||||||
.PHONY: medium.en
|
.PHONY: medium.en
|
||||||
.PHONY: medium
|
.PHONY: medium
|
||||||
.PHONY: large-v1
|
.PHONY: large-v1
|
||||||
.PHONY: large
|
.PHONY: large-v2
|
||||||
|
.PHONY: large-v3
|
||||||
|
.PHONY: large-v3-turbo
|
||||||
|
|
||||||
tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
|
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
|
||||||
bash ./models/download-ggml-model.sh $@
|
bash ./models/download-ggml-model.sh $@
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
@echo ""
|
@echo ""
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
@echo "Running $@ on all samples in ./samples ..."
|
@echo "Running $@ on all samples in ./samples ..."
|
||||||
@echo "==============================================="
|
@echo "==============================================="
|
||||||
@echo ""
|
@echo ""
|
||||||
@for f in samples/*.wav; do \
|
@for f in samples/*$(.flac .mp3 .ogg .wav); do \
|
||||||
echo "----------------------------------------------" ; \
|
echo "----------------------------------------------" ; \
|
||||||
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
|
||||||
echo "----------------------------------------------" ; \
|
echo "----------------------------------------------" ; \
|
||||||
echo "" ; \
|
echo "" ; \
|
||||||
./main -m models/ggml-$@.bin -f $$f ; \
|
./build/bin/whisper-cli -m models/ggml-$@.bin -f $$f ; \
|
||||||
echo "" ; \
|
echo "" ; \
|
||||||
done
|
done
|
||||||
|
|
||||||
#
|
|
||||||
# Tests
|
|
||||||
#
|
|
||||||
|
|
||||||
.PHONY: tests
|
|
||||||
tests:
|
|
||||||
bash ./tests/run-tests.sh
|
|
||||||
|
721
README.md
721
README.md
@ -1,37 +1,47 @@
|
|||||||
# whisper.cpp
|
# whisper.cpp
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
[](https://github.com/ggerganov/whisper.cpp/actions)
|
[](https://github.com/ggerganov/whisper.cpp/actions)
|
||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
|
[](https://conan.io/center/whisper-cpp)
|
||||||
[](https://www.npmjs.com/package/whisper.cpp/)
|
[](https://www.npmjs.com/package/whisper.cpp/)
|
||||||
|
|
||||||
Stable: [v1.2.1](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.2.1) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
> [!NOTE]
|
||||||
|
> New maintenance roadmap: https://github.com/ggerganov/whisper.cpp/discussions/2788
|
||||||
|
|
||||||
|
Stable: [v1.7.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.7.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
||||||
|
|
||||||
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
||||||
|
|
||||||
- Plain C/C++ implementation without dependencies
|
- Plain C/C++ implementation without dependencies
|
||||||
- Apple silicon first-class citizen - optimized via Arm Neon and Accelerate framework
|
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](#core-ml-support)
|
||||||
- AVX intrinsics support for x86 architectures
|
- AVX intrinsics support for x86 architectures
|
||||||
- VSX intrinsics support for POWER architectures
|
- [VSX intrinsics support for POWER architectures](#power-vsx-intrinsics)
|
||||||
- Mixed F16 / F32 precision
|
- Mixed F16 / F32 precision
|
||||||
- Low memory usage (Flash Attention)
|
- [Integer quantization support](#quantization)
|
||||||
- Zero memory allocations at runtime
|
- Zero memory allocations at runtime
|
||||||
- Runs on the CPU
|
- [Vulkan support](#vulkan-gpu-support)
|
||||||
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
- Support for CPU-only inference
|
||||||
|
- [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
|
||||||
|
- [OpenVINO Support](#openvino-support)
|
||||||
|
- [Ascend NPU Support](#ascend-npu-support)
|
||||||
|
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/include/whisper.h)
|
||||||
|
|
||||||
Supported platforms:
|
Supported platforms:
|
||||||
|
|
||||||
- [x] Mac OS (Intel and Arm)
|
- [x] Mac OS (Intel and Arm)
|
||||||
- [x] [iOS](examples/whisper.objc)
|
- [x] [iOS](examples/whisper.objc)
|
||||||
- [x] [Android](examples/whisper.android)
|
- [x] [Android](examples/whisper.android)
|
||||||
|
- [x] [Java](bindings/java/README.md)
|
||||||
- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
|
- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
|
||||||
- [x] [WebAssembly](examples/whisper.wasm)
|
- [x] [WebAssembly](examples/whisper.wasm)
|
||||||
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
||||||
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
||||||
|
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
||||||
|
|
||||||
The entire implementation of the model is contained in 2 source files:
|
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
|
||||||
|
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
||||||
- Tensor operations: [ggml.h](ggml.h) / [ggml.c](ggml.c)
|
|
||||||
- Transformer inference: [whisper.h](whisper.h) / [whisper.cpp](whisper.cpp)
|
|
||||||
|
|
||||||
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
||||||
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
||||||
@ -42,149 +52,53 @@ You can also easily make your own offline voice assistant application: [command]
|
|||||||
|
|
||||||
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
|
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
|
||||||
|
|
||||||
Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
|
On Apple Silicon, the inference runs fully on the GPU via Metal:
|
||||||
|
|
||||||
## Implementation details
|
https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
|
||||||
|
|
||||||
- The core tensor operations are implemented in C ([ggml.h](ggml.h) / [ggml.c](ggml.c))
|
|
||||||
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](whisper.h) / [whisper.cpp](whisper.cpp))
|
|
||||||
- Sample usage is demonstrated in [main.cpp](examples/main)
|
|
||||||
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
|
||||||
- Various other examples are available in the [examples](examples) folder
|
|
||||||
|
|
||||||
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
|
|
||||||
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
|
|
||||||
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
|
||||||
|
|
||||||
## Quick start
|
## Quick start
|
||||||
|
|
||||||
First, download one of the Whisper models converted in [ggml format](models). For example:
|
First clone the repository:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash ./models/download-ggml-model.sh base.en
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Now build the [main](examples/main) example and transcribe an audio file like this:
|
Navigate into the directory:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd whisper.cpp
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# build the main example
|
sh ./models/download-ggml-model.sh base.en
|
||||||
make
|
```
|
||||||
|
|
||||||
|
Now build the [whisper-cli](examples/cli) example and transcribe an audio file like this:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# build the project
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
|
|
||||||
# transcribe an audio file
|
# transcribe an audio file
|
||||||
./main -f samples/jfk.wav
|
./build/bin/whisper-cli -f samples/jfk.wav
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
For a quick demo, simply run `make base.en`:
|
For a quick demo, simply run `make base.en`.
|
||||||
|
|
||||||
```java
|
|
||||||
$ make base.en
|
|
||||||
|
|
||||||
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
|
|
||||||
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
|
|
||||||
c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main -framework Accelerate
|
|
||||||
./main -h
|
|
||||||
|
|
||||||
usage: ./main [options] file0.wav file1.wav ...
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help [default] show this help message and exit
|
|
||||||
-t N, --threads N [4 ] number of threads to use during computation
|
|
||||||
-p N, --processors N [1 ] number of processors to use during computation
|
|
||||||
-ot N, --offset-t N [0 ] time offset in milliseconds
|
|
||||||
-on N, --offset-n N [0 ] segment index offset
|
|
||||||
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
|
||||||
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
|
||||||
-ml N, --max-len N [0 ] maximum segment length in characters
|
|
||||||
-bo N, --best-of N [5 ] number of best candidates to keep
|
|
||||||
-bs N, --beam-size N [-1 ] beam size for beam search
|
|
||||||
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
|
||||||
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
|
||||||
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
|
||||||
-su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
|
|
||||||
-tr, --translate [false ] translate from source language to english
|
|
||||||
-di, --diarize [false ] stereo audio diarization
|
|
||||||
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
|
||||||
-otxt, --output-txt [false ] output result in a text file
|
|
||||||
-ovtt, --output-vtt [false ] output result in a vtt file
|
|
||||||
-osrt, --output-srt [false ] output result in a srt file
|
|
||||||
-owts, --output-words [false ] output script for generating karaoke video
|
|
||||||
-ocsv, --output-csv [false ] output result in a CSV file
|
|
||||||
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
|
||||||
-ps, --print-special [false ] print special tokens
|
|
||||||
-pc, --print-colors [false ] print colors
|
|
||||||
-pp, --print-progress [false ] print progress
|
|
||||||
-nt, --no-timestamps [true ] do not print timestamps
|
|
||||||
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
|
||||||
--prompt PROMPT [ ] initial prompt
|
|
||||||
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
|
||||||
-f FNAME, --file FNAME [ ] input WAV file path
|
|
||||||
|
|
||||||
|
|
||||||
bash ./models/download-ggml-model.sh base.en
|
|
||||||
Downloading ggml model base.en ...
|
|
||||||
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
|
|
||||||
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
|
||||||
You can now use it like this:
|
|
||||||
|
|
||||||
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
|
||||||
|
|
||||||
|
|
||||||
===============================================
|
|
||||||
Running base.en on all samples in ./samples ...
|
|
||||||
===============================================
|
|
||||||
|
|
||||||
----------------------------------------------
|
|
||||||
[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
|
|
||||||
----------------------------------------------
|
|
||||||
|
|
||||||
whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
|
|
||||||
whisper_model_load: loading model
|
|
||||||
whisper_model_load: n_vocab = 51864
|
|
||||||
whisper_model_load: n_audio_ctx = 1500
|
|
||||||
whisper_model_load: n_audio_state = 512
|
|
||||||
whisper_model_load: n_audio_head = 8
|
|
||||||
whisper_model_load: n_audio_layer = 6
|
|
||||||
whisper_model_load: n_text_ctx = 448
|
|
||||||
whisper_model_load: n_text_state = 512
|
|
||||||
whisper_model_load: n_text_head = 8
|
|
||||||
whisper_model_load: n_text_layer = 6
|
|
||||||
whisper_model_load: n_mels = 80
|
|
||||||
whisper_model_load: f16 = 1
|
|
||||||
whisper_model_load: type = 2
|
|
||||||
whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
|
|
||||||
whisper_model_load: kv self size = 5.25 MB
|
|
||||||
whisper_model_load: kv cross size = 17.58 MB
|
|
||||||
whisper_model_load: adding 1607 extra tokens
|
|
||||||
whisper_model_load: model ctx = 140.60 MB
|
|
||||||
whisper_model_load: model size = 140.54 MB
|
|
||||||
|
|
||||||
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
|
||||||
|
|
||||||
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
|
||||||
|
|
||||||
|
|
||||||
[00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
|
|
||||||
|
|
||||||
|
|
||||||
whisper_print_timings: fallbacks = 0 p / 0 h
|
|
||||||
whisper_print_timings: load time = 113.81 ms
|
|
||||||
whisper_print_timings: mel time = 15.40 ms
|
|
||||||
whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
|
|
||||||
whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
|
|
||||||
whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
|
|
||||||
whisper_print_timings: total time = 476.31 ms
|
|
||||||
```
|
|
||||||
|
|
||||||
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
||||||
|
|
||||||
For detailed usage instructions, run: `./main -h`
|
For detailed usage instructions, run: `./build/bin/whisper-cli -h`
|
||||||
|
|
||||||
Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
|
Note that the [whisper-cli](examples/cli) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
|
||||||
For example, you can use `ffmpeg` like this:
|
For example, you can use `ffmpeg` like this:
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -193,7 +107,7 @@ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
|||||||
If you want some extra audio samples to play with, simply run:
|
If you want some extra audio samples to play with, simply run:
|
||||||
|
|
||||||
```
|
```
|
||||||
make samples
|
make -j samples
|
||||||
```
|
```
|
||||||
|
|
||||||
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
||||||
@ -201,124 +115,324 @@ This will download a few more audio files from Wikipedia and convert them to 16-
|
|||||||
You can download and run the other models as follows:
|
You can download and run the other models as follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
make tiny.en
|
make -j tiny.en
|
||||||
make tiny
|
make -j tiny
|
||||||
make base.en
|
make -j base.en
|
||||||
make base
|
make -j base
|
||||||
make small.en
|
make -j small.en
|
||||||
make small
|
make -j small
|
||||||
make medium.en
|
make -j medium.en
|
||||||
make medium
|
make -j medium
|
||||||
make large-v1
|
make -j large-v1
|
||||||
make large
|
make -j large-v2
|
||||||
|
make -j large-v3
|
||||||
|
make -j large-v3-turbo
|
||||||
```
|
```
|
||||||
|
|
||||||
## Memory usage
|
## Memory usage
|
||||||
|
|
||||||
| Model | Disk | Mem | SHA |
|
| Model | Disk | Mem |
|
||||||
| --- | --- | --- | --- |
|
| ------ | ------- | ------- |
|
||||||
| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
|
| tiny | 75 MiB | ~273 MB |
|
||||||
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
|
| base | 142 MiB | ~388 MB |
|
||||||
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
|
| small | 466 MiB | ~852 MB |
|
||||||
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
|
| medium | 1.5 GiB | ~2.1 GB |
|
||||||
| large | 2.9 GB | ~3.3 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
|
| large | 2.9 GiB | ~3.9 GB |
|
||||||
|
|
||||||
|
## POWER VSX Intrinsics
|
||||||
|
|
||||||
|
`whisper.cpp` supports POWER architectures and includes code which
|
||||||
|
significantly speeds operation on Linux running on POWER9/10, making it
|
||||||
|
capable of faster-than-realtime transcription on underclocked Raptor
|
||||||
|
Talos II. Ensure you have a BLAS package installed, and replace the
|
||||||
|
standard cmake setup with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# build with GGML_BLAS defined
|
||||||
|
cmake -B build -DGGML_BLAS=1
|
||||||
|
cmake --build build --config Release
|
||||||
|
./build/bin/whisper-cli [ .. etc .. ]
|
||||||
|
|
||||||
|
## Quantization
|
||||||
|
|
||||||
|
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
||||||
|
Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
|
||||||
|
|
||||||
|
Here are the steps for creating and using a quantized model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# quantize a model with Q5_0 method
|
||||||
|
cmake -B build
|
||||||
|
cmake --build build --config Release
|
||||||
|
./build/bin/quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
||||||
|
|
||||||
|
# run the examples as usual, specifying the quantized model file
|
||||||
|
./build/bin/whisper-cli -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
## Core ML support
|
||||||
|
|
||||||
|
On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
|
||||||
|
speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
|
||||||
|
|
||||||
|
- Install Python dependencies needed for the creation of the Core ML model:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install ane_transformers
|
||||||
|
pip install openai-whisper
|
||||||
|
pip install coremltools
|
||||||
|
```
|
||||||
|
|
||||||
|
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
|
||||||
|
- Python 3.10 is recommended.
|
||||||
|
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
|
||||||
|
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
|
||||||
|
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
|
||||||
|
- To activate the environment, use: `conda activate py310-whisper`
|
||||||
|
|
||||||
|
- Generate a Core ML model. For example, to generate a `base.en` model, use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./models/generate-coreml-model.sh base.en
|
||||||
|
```
|
||||||
|
|
||||||
|
This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
|
||||||
|
|
||||||
|
- Build `whisper.cpp` with Core ML support:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# using CMake
|
||||||
|
cmake -B build -DWHISPER_COREML=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
- Run the examples as usual. For example:
|
||||||
|
|
||||||
|
```text
|
||||||
|
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
|
||||||
|
whisper_init_state: first run on a device may take a while ...
|
||||||
|
whisper_init_state: Core ML model loaded
|
||||||
|
|
||||||
|
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
|
||||||
|
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
|
||||||
|
Next runs are faster.
|
||||||
|
|
||||||
|
For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
|
||||||
|
|
||||||
|
## OpenVINO support
|
||||||
|
|
||||||
|
On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
|
||||||
|
on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
|
||||||
|
|
||||||
|
This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
|
||||||
|
|
||||||
|
- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
|
||||||
|
|
||||||
|
Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
cd models
|
||||||
|
python -m venv openvino_conv_env
|
||||||
|
openvino_conv_env\Scripts\activate
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements-openvino.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Linux and macOS:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd models
|
||||||
|
python3 -m venv openvino_conv_env
|
||||||
|
source openvino_conv_env/bin/activate
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements-openvino.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
|
||||||
|
|
||||||
|
```
|
||||||
|
python convert-whisper-to-openvino.py --model base.en
|
||||||
|
```
|
||||||
|
|
||||||
|
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
|
||||||
|
is the default location that the OpenVINO extension will search at runtime.
|
||||||
|
|
||||||
|
- Build `whisper.cpp` with OpenVINO support:
|
||||||
|
|
||||||
|
Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
|
||||||
|
|
||||||
|
After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
|
||||||
|
|
||||||
|
Linux:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Windows (cmd):
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
And then build the project using cmake:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cmake -B build -DWHISPER_OPENVINO=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
- Run the examples as usual. For example:
|
||||||
|
|
||||||
|
```text
|
||||||
|
$ ./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
|
||||||
|
whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
|
||||||
|
whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
|
||||||
|
whisper_ctx_init_openvino_encoder: OpenVINO model loaded
|
||||||
|
|
||||||
|
system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
|
||||||
|
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
|
||||||
|
cached for the next run.
|
||||||
|
|
||||||
|
For more information about the OpenVINO implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
|
||||||
|
|
||||||
|
## NVIDIA GPU support
|
||||||
|
|
||||||
|
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
|
||||||
|
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
|
||||||
|
|
||||||
|
Now build `whisper.cpp` with CUDA support:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_CUDA=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
## Vulkan GPU support
|
||||||
|
Cross-vendor solution which allows you to accelerate workload on your GPU.
|
||||||
|
First, make sure your graphics card driver provides support for Vulkan API.
|
||||||
|
|
||||||
|
Now build `whisper.cpp` with Vulkan support:
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_VULKAN=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
## BLAS CPU support via OpenBLAS
|
||||||
|
|
||||||
|
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
||||||
|
First, make sure you have installed `openblas`: https://www.openblas.net/
|
||||||
|
|
||||||
|
Now build `whisper.cpp` with OpenBLAS support:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_BLAS=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
## Ascend NPU support
|
||||||
|
|
||||||
|
Ascend NPU provides inference acceleration via [`CANN`](https://www.hiascend.com/en/software/cann) and AI cores.
|
||||||
|
|
||||||
|
First, check if your Ascend NPU device is supported:
|
||||||
|
|
||||||
|
**Verified devices**
|
||||||
|
| Ascend NPU | Status |
|
||||||
|
|:-----------------------------:|:-------:|
|
||||||
|
| Atlas 300T A2 | Support |
|
||||||
|
|
||||||
|
Then, make sure you have installed [`CANN toolkit`](https://www.hiascend.com/en/software/cann/community) . The lasted version of CANN is recommanded.
|
||||||
|
|
||||||
|
Now build `whisper.cpp` with CANN support:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -B build -DGGML_CANN=1
|
||||||
|
cmake --build build -j --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the inference examples as usual, for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
./build/bin/whisper-cli -f samples/jfk.wav -m models/ggml-base.en.bin -t 8
|
||||||
|
```
|
||||||
|
|
||||||
|
*Notes:*
|
||||||
|
|
||||||
|
- If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
|
||||||
|
- If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
|
||||||
|
|
||||||
|
## Docker
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Docker must be installed and running on your system.
|
||||||
|
- Create a folder to store big models & intermediate files (ex. /whisper/models)
|
||||||
|
|
||||||
|
### Images
|
||||||
|
|
||||||
|
We have two Docker images available for this project:
|
||||||
|
|
||||||
|
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
||||||
|
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# download model and persist it in a local folder
|
||||||
|
docker run -it --rm \
|
||||||
|
-v path/to/models:/models \
|
||||||
|
whisper.cpp:main "./models/download-ggml-model.sh base /models"
|
||||||
|
# transcribe an audio file
|
||||||
|
docker run -it --rm \
|
||||||
|
-v path/to/models:/models \
|
||||||
|
-v path/to/audios:/audios \
|
||||||
|
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
||||||
|
# transcribe an audio file in samples folder
|
||||||
|
docker run -it --rm \
|
||||||
|
-v path/to/models:/models \
|
||||||
|
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installing with Conan
|
||||||
|
|
||||||
|
You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
conan install --requires="whisper-cpp/[*]" --build=missing
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
- Inference only
|
- Inference only
|
||||||
- No GPU support (yet)
|
|
||||||
|
|
||||||
## Another example
|
|
||||||
|
|
||||||
Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
|
|
||||||
in about half a minute on a MacBook M1 Pro, using `medium.en` model:
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>Expand to see the result</summary>
|
|
||||||
|
|
||||||
```java
|
|
||||||
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
|
|
||||||
|
|
||||||
whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
|
|
||||||
whisper_model_load: loading model
|
|
||||||
whisper_model_load: n_vocab = 51864
|
|
||||||
whisper_model_load: n_audio_ctx = 1500
|
|
||||||
whisper_model_load: n_audio_state = 1024
|
|
||||||
whisper_model_load: n_audio_head = 16
|
|
||||||
whisper_model_load: n_audio_layer = 24
|
|
||||||
whisper_model_load: n_text_ctx = 448
|
|
||||||
whisper_model_load: n_text_state = 1024
|
|
||||||
whisper_model_load: n_text_head = 16
|
|
||||||
whisper_model_load: n_text_layer = 24
|
|
||||||
whisper_model_load: n_mels = 80
|
|
||||||
whisper_model_load: f16 = 1
|
|
||||||
whisper_model_load: type = 4
|
|
||||||
whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
|
|
||||||
whisper_model_load: kv self size = 42.00 MB
|
|
||||||
whisper_model_load: kv cross size = 140.62 MB
|
|
||||||
whisper_model_load: adding 1607 extra tokens
|
|
||||||
whisper_model_load: model ctx = 1462.35 MB
|
|
||||||
whisper_model_load: model size = 1462.12 MB
|
|
||||||
|
|
||||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
|
||||||
|
|
||||||
main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
|
||||||
|
|
||||||
|
|
||||||
[00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
|
|
||||||
[00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
|
|
||||||
[00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
|
|
||||||
[00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
|
|
||||||
[00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
|
|
||||||
[00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
|
|
||||||
[00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
|
|
||||||
[00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
|
|
||||||
[00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
|
|
||||||
[00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
|
|
||||||
[00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
|
|
||||||
[00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
|
|
||||||
[00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
|
|
||||||
[00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
|
|
||||||
[00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
|
|
||||||
[00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
|
|
||||||
[00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
|
|
||||||
[00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
|
|
||||||
[00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
|
|
||||||
[00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
|
|
||||||
[00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
|
|
||||||
[00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
|
|
||||||
[00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
|
|
||||||
[00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
|
|
||||||
[00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
|
|
||||||
[00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
|
|
||||||
[00:02:35.000 --> 00:02:39.000] and calls them each by name."
|
|
||||||
[00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
|
|
||||||
[00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
|
|
||||||
[00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
|
|
||||||
[00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
|
|
||||||
[00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
|
|
||||||
[00:03:13.000 --> 00:03:19.000] [Silence]
|
|
||||||
|
|
||||||
|
|
||||||
whisper_print_timings: fallbacks = 1 p / 0 h
|
|
||||||
whisper_print_timings: load time = 569.03 ms
|
|
||||||
whisper_print_timings: mel time = 146.85 ms
|
|
||||||
whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
|
|
||||||
whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
|
|
||||||
whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
|
|
||||||
whisper_print_timings: total time = 32733.52 ms
|
|
||||||
```
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Real-time audio input example
|
## Real-time audio input example
|
||||||
|
|
||||||
This is a naive example of performing real-time inference on audio from your microphone.
|
This is a naive example of performing real-time inference on audio from your microphone.
|
||||||
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continously.
|
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
|
||||||
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
make stream
|
cmake -B build -DWHISPER_SDL2=ON
|
||||||
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
cmake --build build --config Release
|
||||||
|
./build/bin/whisper-stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
||||||
@ -328,14 +442,18 @@ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a
|
|||||||
Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
|
Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
|
||||||
to highlight words with high or low confidence:
|
to highlight words with high or low confidence:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./build/bin/whisper-cli -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
|
||||||
|
```
|
||||||
|
|
||||||
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
|
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
|
||||||
|
|
||||||
## Controlling the length of the generated text segments (experimental)
|
## Controlling the length of the generated text segments (experimental)
|
||||||
|
|
||||||
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
|
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
|
||||||
|
|
||||||
```java
|
```text
|
||||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
||||||
|
|
||||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||||
...
|
...
|
||||||
@ -354,12 +472,12 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
|
|||||||
[00:00:10.020 --> 00:00:11.000] country.
|
[00:00:10.020 --> 00:00:11.000] country.
|
||||||
```
|
```
|
||||||
|
|
||||||
## Word-level timestamp
|
## Word-level timestamp (experimental)
|
||||||
|
|
||||||
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
|
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
|
||||||
|
|
||||||
```java
|
```text
|
||||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
$ ./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
||||||
|
|
||||||
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
||||||
...
|
...
|
||||||
@ -367,7 +485,7 @@ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1
|
|||||||
|
|
||||||
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
||||||
|
|
||||||
[00:00:00.000 --> 00:00:00.320]
|
[00:00:00.000 --> 00:00:00.320]
|
||||||
[00:00:00.320 --> 00:00:00.370] And
|
[00:00:00.320 --> 00:00:00.370] And
|
||||||
[00:00:00.370 --> 00:00:00.690] so
|
[00:00:00.370 --> 00:00:00.690] so
|
||||||
[00:00:00.690 --> 00:00:00.850] my
|
[00:00:00.690 --> 00:00:00.850] my
|
||||||
@ -395,16 +513,42 @@ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 pr
|
|||||||
[00:00:10.510 --> 00:00:11.000] .
|
[00:00:10.510 --> 00:00:11.000] .
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Speaker segmentation via tinydiarize (experimental)
|
||||||
|
|
||||||
|
More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
|
||||||
|
|
||||||
|
Sample usage:
|
||||||
|
|
||||||
|
```py
|
||||||
|
# download a tinydiarize compatible model
|
||||||
|
./models/download-ggml-model.sh small.en-tdrz
|
||||||
|
|
||||||
|
# run as usual, adding the "-tdrz" command-line argument
|
||||||
|
./build/bin/whisper-cli -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
|
||||||
|
...
|
||||||
|
main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
|
||||||
|
...
|
||||||
|
[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
|
||||||
|
[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
|
||||||
|
[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
|
||||||
|
[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
|
||||||
|
[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
|
||||||
|
[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
|
||||||
|
[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
|
||||||
|
[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
|
||||||
|
[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
|
||||||
|
```
|
||||||
|
|
||||||
## Karaoke-style movie generation (experimental)
|
## Karaoke-style movie generation (experimental)
|
||||||
|
|
||||||
The [main](examples/main) example provides support for output of karaoke-style movies, where the
|
The [whisper-cli](examples/cli) example provides support for output of karaoke-style movies, where the
|
||||||
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
|
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
|
||||||
This requires to have `ffmpeg` installed.
|
This requires to have `ffmpeg` installed.
|
||||||
|
|
||||||
Here are a few *"typical"* examples:
|
Here are a few _"typical"_ examples:
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
|
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
|
||||||
source ./samples/jfk.wav.wts
|
source ./samples/jfk.wav.wts
|
||||||
ffplay ./samples/jfk.wav.mp4
|
ffplay ./samples/jfk.wav.mp4
|
||||||
```
|
```
|
||||||
@ -413,8 +557,8 @@ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
|
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
|
||||||
source ./samples/mm0.wav.wts
|
source ./samples/mm0.wav.wts
|
||||||
ffplay ./samples/mm0.wav.mp4
|
ffplay ./samples/mm0.wav.mp4
|
||||||
```
|
```
|
||||||
@ -423,8 +567,8 @@ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-9
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
|
./build/bin/whisper-cli -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
|
||||||
source ./samples/gb0.wav.wts
|
source ./samples/gb0.wav.wts
|
||||||
ffplay ./samples/gb0.wav.mp4
|
ffplay ./samples/gb0.wav.mp4
|
||||||
```
|
```
|
||||||
@ -435,10 +579,10 @@ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a
|
|||||||
|
|
||||||
## Video comparison of different models
|
## Video comparison of different models
|
||||||
|
|
||||||
Use the [extra/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/extra/bench-wts.sh) script to generate a video in the following format:
|
Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
|
||||||
|
|
||||||
```java
|
```bash
|
||||||
./extra/bench-wts.sh samples/jfk.wav
|
./scripts/bench-wts.sh samples/jfk.wav
|
||||||
ffplay ./samples/jfk.wav.all.mp4
|
ffplay ./samples/jfk.wav.all.mp4
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -449,12 +593,24 @@ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8
|
|||||||
## Benchmarks
|
## Benchmarks
|
||||||
|
|
||||||
In order to have an objective comparison of the performance of the inference across different system configurations,
|
In order to have an objective comparison of the performance of the inference across different system configurations,
|
||||||
use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
|
use the [whisper-bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
|
||||||
took to execute it. The results are summarized in the following Github issue:
|
took to execute it. The results are summarized in the following Github issue:
|
||||||
|
|
||||||
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
||||||
|
|
||||||
## ggml format
|
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
|
||||||
|
|
||||||
|
You can run it with the following command, by default it will run against any standard model in the models folder.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
|
||||||
|
```
|
||||||
|
|
||||||
|
It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
|
||||||
|
|
||||||
|
It outputs a csv file with the results of the benchmarking.
|
||||||
|
|
||||||
|
## `ggml` format
|
||||||
|
|
||||||
The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
|
The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
|
||||||
|
|
||||||
@ -469,44 +625,51 @@ or manually from here:
|
|||||||
- https://huggingface.co/ggerganov/whisper.cpp
|
- https://huggingface.co/ggerganov/whisper.cpp
|
||||||
- https://ggml.ggerganov.com
|
- https://ggml.ggerganov.com
|
||||||
|
|
||||||
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or the README
|
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
|
||||||
in [models](models).
|
|
||||||
|
|
||||||
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
|
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
|
||||||
|
|
||||||
- [X] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
||||||
- [X] Javascript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
||||||
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
|
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
|
||||||
- [X] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
||||||
- [X] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
|
- [x] Java:
|
||||||
- [X] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
|
||||||
- [X] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
|
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
|
||||||
|
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
||||||
|
- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
|
||||||
|
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
|
||||||
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
|
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
|
||||||
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
|
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
|
||||||
- [X] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
|
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
|
||||||
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
||||||
|
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
||||||
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
||||||
- [X] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
- [abdeladim-s/pywhispercpp](https://github.com/abdeladim-s/pywhispercpp) (Pybind11)
|
||||||
|
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
||||||
|
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
There are various examples of using the library for different projects in the [examples](examples) folder.
|
There are various examples of using the library for different projects in the [examples](examples) folder.
|
||||||
Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
|
Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
|
||||||
|
|
||||||
| Example | Web | Description |
|
| Example | Web | Description |
|
||||||
| --- | --- | --- |
|
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
| [whisper-cli](examples/cli) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
||||||
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
| [whisper-bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
||||||
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
| [whisper-stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
||||||
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
| [whisper-command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
||||||
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
|
| [whisper-server](examples/server) | | HTTP transcription server with OAI-like API |
|
||||||
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
| [whisper-talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
|
||||||
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
||||||
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
||||||
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
||||||
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
||||||
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
||||||
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
||||||
|
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
||||||
|
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
|
||||||
|
|
||||||
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
|
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
|
||||||
|
|
||||||
|
249
README_sycl.md
Normal file
249
README_sycl.md
Normal file
@ -0,0 +1,249 @@
|
|||||||
|
# whisper.cpp for SYCL
|
||||||
|
|
||||||
|
[Background](#background)
|
||||||
|
|
||||||
|
[OS](#os)
|
||||||
|
|
||||||
|
[Intel GPU](#intel-gpu)
|
||||||
|
|
||||||
|
[Linux](#linux)
|
||||||
|
|
||||||
|
[Environment Variable](#environment-variable)
|
||||||
|
|
||||||
|
[Known Issue](#known-issue)
|
||||||
|
|
||||||
|
[Todo](#todo)
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators<72>such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
||||||
|
|
||||||
|
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
||||||
|
|
||||||
|
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
||||||
|
|
||||||
|
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel<EFBFBD> DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
||||||
|
|
||||||
|
The whisper.cpp for SYCL is used to support Intel GPUs.
|
||||||
|
|
||||||
|
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
||||||
|
|
||||||
|
## OS
|
||||||
|
|
||||||
|
|OS|Status|Verified|
|
||||||
|
|-|-|-|
|
||||||
|
|Linux|Support|Ubuntu 22.04|
|
||||||
|
|Windows|Ongoing| |
|
||||||
|
|
||||||
|
|
||||||
|
## Intel GPU
|
||||||
|
|
||||||
|
|Intel GPU| Status | Verified Model|
|
||||||
|
|-|-|-|
|
||||||
|
|Intel Data Center Max Series| Support| Max 1550|
|
||||||
|
|Intel Data Center Flex Series| Support| Flex 170|
|
||||||
|
|Intel Arc Series| Support| Arc 770|
|
||||||
|
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
||||||
|
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
||||||
|
|
||||||
|
|
||||||
|
## Linux
|
||||||
|
|
||||||
|
### Setup Environment
|
||||||
|
|
||||||
|
1. Install Intel GPU driver.
|
||||||
|
|
||||||
|
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
||||||
|
|
||||||
|
Note: for iGPU, please install the client GPU driver.
|
||||||
|
|
||||||
|
b. Add user to group: video, render.
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo usermod -aG render username
|
||||||
|
sudo usermod -aG video username
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: re-login to enable it.
|
||||||
|
|
||||||
|
c. Check
|
||||||
|
|
||||||
|
```
|
||||||
|
sudo apt install clinfo
|
||||||
|
sudo clinfo -l
|
||||||
|
```
|
||||||
|
|
||||||
|
Output (example):
|
||||||
|
|
||||||
|
```
|
||||||
|
Platform #0: Intel(R) OpenCL Graphics
|
||||||
|
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
||||||
|
|
||||||
|
|
||||||
|
Platform #0: Intel(R) OpenCL HD Graphics
|
||||||
|
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install Intel<65> oneAPI Base toolkit.
|
||||||
|
|
||||||
|
|
||||||
|
a. Please follow the procedure in [Get the Intel<65> oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
||||||
|
|
||||||
|
Recommend to install to default folder: **/opt/intel/oneapi**.
|
||||||
|
|
||||||
|
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
||||||
|
|
||||||
|
b. Check
|
||||||
|
|
||||||
|
```
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
sycl-ls
|
||||||
|
```
|
||||||
|
|
||||||
|
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
||||||
|
|
||||||
|
Output (example):
|
||||||
|
```
|
||||||
|
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||||
|
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||||
|
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||||
|
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Build locally:
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
#for FP16
|
||||||
|
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
||||||
|
|
||||||
|
#for FP32
|
||||||
|
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
#build example/main only
|
||||||
|
#cmake --build . --config Release --target main
|
||||||
|
|
||||||
|
#build all binary
|
||||||
|
cmake --build . --config Release -v
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```
|
||||||
|
./examples/sycl/build.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
||||||
|
|
||||||
|
### Run
|
||||||
|
|
||||||
|
1. Put model file to folder **models**
|
||||||
|
|
||||||
|
2. Enable oneAPI running environment
|
||||||
|
|
||||||
|
```
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
3. List device ID
|
||||||
|
|
||||||
|
Run without parameter:
|
||||||
|
|
||||||
|
```
|
||||||
|
./build/bin/ls-sycl-device
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
./build/bin/main
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the ID in startup log, like:
|
||||||
|
|
||||||
|
```
|
||||||
|
found 4 SYCL devices:
|
||||||
|
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
||||||
|
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
||||||
|
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
||||||
|
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
||||||
|
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|Attribute|Note|
|
||||||
|
|-|-|
|
||||||
|
|compute capability 1.3|Level-zero running time, recommended |
|
||||||
|
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
||||||
|
|
||||||
|
4. Set device ID and execute whisper.cpp
|
||||||
|
|
||||||
|
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
||||||
|
|
||||||
|
```
|
||||||
|
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
||||||
|
```
|
||||||
|
or run by script:
|
||||||
|
|
||||||
|
```
|
||||||
|
./examples/sycl/run_whisper.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
5. Check the device ID in output
|
||||||
|
|
||||||
|
Like:
|
||||||
|
```
|
||||||
|
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Environment Variable
|
||||||
|
|
||||||
|
#### Build
|
||||||
|
|
||||||
|
|Name|Value|Function|
|
||||||
|
|-|-|-|
|
||||||
|
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
||||||
|
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
||||||
|
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
||||||
|
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
||||||
|
|
||||||
|
#### Running
|
||||||
|
|
||||||
|
|
||||||
|
|Name|Value|Function|
|
||||||
|
|-|-|-|
|
||||||
|
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||||
|
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||||
|
|
||||||
|
## Known Issue
|
||||||
|
|
||||||
|
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||||
|
|
||||||
|
Miss to enable oneAPI running environment.
|
||||||
|
|
||||||
|
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
||||||
|
|
||||||
|
|
||||||
|
- Hang during startup
|
||||||
|
|
||||||
|
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
||||||
|
|
||||||
|
Solution: add **--no-mmap**.
|
||||||
|
|
||||||
|
## Todo
|
||||||
|
|
||||||
|
- Support to build in Windows.
|
||||||
|
|
||||||
|
- Support multiple cards.
|
@ -1,9 +1,31 @@
|
|||||||
|
ifndef UNAME_S
|
||||||
|
UNAME_S := $(shell uname -s)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef UNAME_P
|
||||||
|
UNAME_P := $(shell uname -p)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef UNAME_M
|
||||||
|
UNAME_M := $(shell uname -m)
|
||||||
|
endif
|
||||||
|
|
||||||
|
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
||||||
BUILD_DIR := build
|
BUILD_DIR := build
|
||||||
MODELS_DIR := models
|
MODELS_DIR := models
|
||||||
EXAMPLES_DIR := $(wildcard examples/*)
|
EXAMPLES_DIR := $(wildcard examples/*)
|
||||||
INCLUDE_PATH := $(abspath ../..)
|
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
||||||
LIBRARY_PATH := $(abspath ../..)
|
LIBRARY_PATH := $(abspath ../..)
|
||||||
|
|
||||||
|
ifeq ($(GGML_CUDA),1)
|
||||||
|
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
|
||||||
|
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
||||||
|
endif
|
||||||
|
|
||||||
all: clean whisper examples
|
all: clean whisper examples
|
||||||
|
|
||||||
whisper: mkdir
|
whisper: mkdir
|
||||||
@ -11,8 +33,13 @@ whisper: mkdir
|
|||||||
@${MAKE} -C ../.. libwhisper.a
|
@${MAKE} -C ../.. libwhisper.a
|
||||||
|
|
||||||
test: model-small whisper modtidy
|
test: model-small whisper modtidy
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
|
||||||
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
|
||||||
|
else
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
||||||
|
endif
|
||||||
|
|
||||||
examples: $(EXAMPLES_DIR)
|
examples: $(EXAMPLES_DIR)
|
||||||
|
|
||||||
@ -21,7 +48,11 @@ model-small: mkdir examples/go-model-download
|
|||||||
|
|
||||||
$(EXAMPLES_DIR): mkdir whisper modtidy
|
$(EXAMPLES_DIR): mkdir whisper modtidy
|
||||||
@echo Build example $(notdir $@)
|
@echo Build example $(notdir $@)
|
||||||
|
ifeq ($(UNAME_S),Darwin)
|
||||||
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||||
|
else
|
||||||
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
||||||
|
endif
|
||||||
|
|
||||||
mkdir:
|
mkdir:
|
||||||
@echo Mkdir ${BUILD_DIR}
|
@echo Mkdir ${BUILD_DIR}
|
||||||
@ -32,7 +63,7 @@ mkdir:
|
|||||||
modtidy:
|
modtidy:
|
||||||
@go mod tidy
|
@go mod tidy
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@echo Clean
|
@echo Clean
|
||||||
@rm -fr $(BUILD_DIR)
|
@rm -fr $(BUILD_DIR)
|
||||||
@go clean
|
@go clean
|
||||||
|
@ -31,7 +31,7 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
if err := context.Process(samples, nil); err != nil {
|
if err := context.Process(samples, nil, nil); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,6 +62,12 @@ This will compile a static `libwhisper.a` in a `build` folder, download a model
|
|||||||
make examples
|
make examples
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To build using cuda support add `GGML_CUDA=1`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
GGML_CUDA=1 make examples
|
||||||
|
```
|
||||||
|
|
||||||
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -71,7 +77,7 @@ The examples are placed in the `build` directory. Once built, you can download a
|
|||||||
And you can then test a model against samples with the following command:
|
And you can then test a model against samples with the following command:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
||||||
```
|
```
|
||||||
|
|
||||||
## Using the bindings
|
## Using the bindings
|
||||||
|
@ -9,22 +9,23 @@ import (
|
|||||||
// ContextForSignal returns a context object which is cancelled when a signal
|
// ContextForSignal returns a context object which is cancelled when a signal
|
||||||
// is received. It returns nil if no signal parameter is provided
|
// is received. It returns nil if no signal parameter is provided
|
||||||
func ContextForSignal(signals ...os.Signal) context.Context {
|
func ContextForSignal(signals ...os.Signal) context.Context {
|
||||||
if len(signals) == 0 {
|
if len(signals) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
ch := make(chan os.Signal)
|
ch := make(chan os.Signal, 1) // Buffered channel with space for 1 signal
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Send message on channel when signal received
|
// Send message on channel when signal received
|
||||||
signal.Notify(ch, signals...)
|
signal.Notify(ch, signals...)
|
||||||
|
|
||||||
// When any signal received, call cancel
|
// When any signal is received, call cancel
|
||||||
go func() {
|
go func() {
|
||||||
<-ch
|
<-ch
|
||||||
cancel()
|
cancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Return success
|
// Return success
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -17,14 +18,27 @@ import (
|
|||||||
// CONSTANTS
|
// CONSTANTS
|
||||||
|
|
||||||
const (
|
const (
|
||||||
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/" // The location of the models
|
||||||
srcExt = ".bin" // Filename extension
|
srcExt = ".bin" // Filename extension
|
||||||
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// The models which will be downloaded, if no model is specified as an argument
|
// The models which will be downloaded, if no model is specified as an argument
|
||||||
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large"}
|
modelNames = []string{
|
||||||
|
"tiny", "tiny-q5_1", "tiny-q8_0",
|
||||||
|
"tiny.en", "tiny.en-q5_1", "tiny.en-q8_0",
|
||||||
|
"base", "base-q5_1", "base-q8_0",
|
||||||
|
"base.en", "base.en-q5_1", "base.en-q8_0",
|
||||||
|
"small", "small-q5_1", "small-q8_0",
|
||||||
|
"small.en", "small.en-q5_1", "small.en-q8_0",
|
||||||
|
"medium", "medium-q5_0", "medium-q8_0",
|
||||||
|
"medium.en", "medium.en-q5_0", "medium.en-q8_0",
|
||||||
|
"large-v1",
|
||||||
|
"large-v2", "large-v2-q5_0", "large-v2-q8_0",
|
||||||
|
"large-v3", "large-v3-q5_0",
|
||||||
|
"large-v3-turbo", "large-v3-turbo-q5_0", "large-v3-turbo-q8_0",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -44,7 +58,25 @@ var (
|
|||||||
func main() {
|
func main() {
|
||||||
flag.Usage = func() {
|
flag.Usage = func() {
|
||||||
name := filepath.Base(flag.CommandLine.Name())
|
name := filepath.Base(flag.CommandLine.Name())
|
||||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
|
fmt.Fprintf(flag.CommandLine.Output(), `
|
||||||
|
Usage: %s [options] [<model>...]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-out string Specify the output folder where models will be saved.
|
||||||
|
Default: Current working directory.
|
||||||
|
-timeout duration Set the maximum duration for downloading a model.
|
||||||
|
Example: 10m, 1h (default: 30m0s).
|
||||||
|
-quiet Suppress all output except errors.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
1. Download a specific model:
|
||||||
|
%s -out ./models tiny-q8_0
|
||||||
|
|
||||||
|
2. Download all models:
|
||||||
|
%s -out ./models
|
||||||
|
|
||||||
|
`, name, name, name)
|
||||||
|
|
||||||
flag.PrintDefaults()
|
flag.PrintDefaults()
|
||||||
}
|
}
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
@ -114,23 +146,87 @@ func GetOut() (string, error) {
|
|||||||
// GetModels returns the list of models to download
|
// GetModels returns the list of models to download
|
||||||
func GetModels() []string {
|
func GetModels() []string {
|
||||||
if flag.NArg() == 0 {
|
if flag.NArg() == 0 {
|
||||||
return modelNames
|
fmt.Println("No model specified.")
|
||||||
} else {
|
fmt.Println("Preparing to download all models...")
|
||||||
return flag.Args()
|
|
||||||
|
// Calculate total download size
|
||||||
|
fmt.Println("Calculating total download size...")
|
||||||
|
totalSize, err := CalculateTotalDownloadSize(modelNames)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error calculating download sizes:", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("View available models: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
|
||||||
|
fmt.Printf("Total download size: %.2f GB\n", float64(totalSize)/(1024*1024*1024))
|
||||||
|
fmt.Println("Would you like to download all models? (y/N)")
|
||||||
|
|
||||||
|
// Prompt for user input
|
||||||
|
var response string
|
||||||
|
fmt.Scanln(&response)
|
||||||
|
if response != "y" && response != "Y" {
|
||||||
|
fmt.Println("Aborting. Specify a model to download.")
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
return modelNames // Return all models if confirmed
|
||||||
}
|
}
|
||||||
|
return flag.Args() // Return specific models if arguments are provided
|
||||||
|
}
|
||||||
|
|
||||||
|
func CalculateTotalDownloadSize(models []string) (int64, error) {
|
||||||
|
var totalSize int64
|
||||||
|
client := http.Client{}
|
||||||
|
|
||||||
|
for _, model := range models {
|
||||||
|
modelURL, err := URLForModel(model)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Issue a HEAD request to get the file size
|
||||||
|
req, err := http.NewRequest("HEAD", modelURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
fmt.Printf("Warning: Unable to fetch size for %s (HTTP %d)\n", model, resp.StatusCode)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
size := resp.ContentLength
|
||||||
|
totalSize += size
|
||||||
|
}
|
||||||
|
return totalSize, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// URLForModel returns the URL for the given model on huggingface.co
|
// URLForModel returns the URL for the given model on huggingface.co
|
||||||
func URLForModel(model string) (string, error) {
|
func URLForModel(model string) (string, error) {
|
||||||
|
// Ensure "ggml-" prefix is added only once
|
||||||
|
if !strings.HasPrefix(model, "ggml-") {
|
||||||
|
model = "ggml-" + model
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure ".bin" extension is added only once
|
||||||
if filepath.Ext(model) != srcExt {
|
if filepath.Ext(model) != srcExt {
|
||||||
model += srcExt
|
model += srcExt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the base URL
|
||||||
url, err := url.Parse(srcUrl)
|
url, err := url.Parse(srcUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
} else {
|
|
||||||
url.Path = filepath.Join(url.Path, model)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure no trailing slash in the base URL
|
||||||
|
url.Path = fmt.Sprintf("%s/%s", strings.TrimSuffix(url.Path, "/"), model)
|
||||||
return url.String(), nil
|
return url.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,10 +68,6 @@ func (flags *Flags) GetOut() string {
|
|||||||
return strings.ToLower(flags.Lookup("out").Value.String())
|
return strings.ToLower(flags.Lookup("out").Value.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (flags *Flags) IsSpeedup() bool {
|
|
||||||
return flags.Lookup("speedup").Value.String() == "true"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (flags *Flags) IsTokens() bool {
|
func (flags *Flags) IsTokens() bool {
|
||||||
return flags.Lookup("tokens").Value.String() == "true"
|
return flags.Lookup("tokens").Value.String() == "true"
|
||||||
}
|
}
|
||||||
@ -111,10 +107,6 @@ func (flags *Flags) SetParams(context whisper.Context) error {
|
|||||||
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
||||||
context.SetDuration(duration)
|
context.SetDuration(duration)
|
||||||
}
|
}
|
||||||
if flags.IsSpeedup() {
|
|
||||||
fmt.Fprintf(flags.Output(), "Setting speedup to true\n")
|
|
||||||
context.SetSpeedup(true)
|
|
||||||
}
|
|
||||||
if threads := flags.GetThreads(); threads != 0 {
|
if threads := flags.GetThreads(); threads != 0 {
|
||||||
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
||||||
context.SetThreads(threads)
|
context.SetThreads(threads)
|
||||||
@ -146,7 +138,6 @@ func registerFlags(flag *Flags) {
|
|||||||
flag.Duration("offset", 0, "Time offset")
|
flag.Duration("offset", 0, "Time offset")
|
||||||
flag.Duration("duration", 0, "Duration of audio to process")
|
flag.Duration("duration", 0, "Duration of audio to process")
|
||||||
flag.Uint("threads", 0, "Number of threads to use")
|
flag.Uint("threads", 0, "Number of threads to use")
|
||||||
flag.Bool("speedup", false, "Enable speedup")
|
|
||||||
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
||||||
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
||||||
flag.Float64("word-thold", 0, "Maximum segment score")
|
flag.Float64("word-thold", 0, "Maximum segment score")
|
||||||
|
@ -67,7 +67,7 @@ func Process(model whisper.Model, path string, flags *Flags) error {
|
|||||||
// Process the data
|
// Process the data
|
||||||
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
||||||
context.ResetTimings()
|
context.ResetTimings()
|
||||||
if err := context.Process(data, cb); err != nil {
|
if err := context.Process(data, cb, nil); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
module github.com/ggerganov/whisper.cpp/bindings/go
|
module github.com/ggerganov/whisper.cpp/bindings/go
|
||||||
|
|
||||||
go 1.19
|
go 1.23
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/go-audio/wav v1.1.0
|
github.com/go-audio/wav v1.1.0
|
||||||
github.com/stretchr/testify v1.8.1
|
github.com/stretchr/testify v1.9.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||||
@ -9,15 +8,9 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
|||||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
|
||||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
|
||||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
|
||||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
|
||||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
@ -19,6 +19,10 @@ func (p *Params) SetTranslate(v bool) {
|
|||||||
p.translate = toBool(v)
|
p.translate = toBool(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetSplitOnWord(v bool) {
|
||||||
|
p.split_on_word = toBool(v)
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Params) SetNoContext(v bool) {
|
func (p *Params) SetNoContext(v bool) {
|
||||||
p.no_context = toBool(v)
|
p.no_context = toBool(v)
|
||||||
}
|
}
|
||||||
@ -43,10 +47,6 @@ func (p *Params) SetPrintTimestamps(v bool) {
|
|||||||
p.print_timestamps = toBool(v)
|
p.print_timestamps = toBool(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Params) SetSpeedup(v bool) {
|
|
||||||
p.speed_up = toBool(v)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set language id
|
// Set language id
|
||||||
func (p *Params) SetLanguage(lang int) error {
|
func (p *Params) SetLanguage(lang int) error {
|
||||||
if lang == -1 {
|
if lang == -1 {
|
||||||
@ -105,11 +105,47 @@ func (p *Params) SetMaxSegmentLength(n int) {
|
|||||||
p.max_len = C.int(n)
|
p.max_len = C.int(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetTokenTimestamps(b bool) {
|
||||||
|
p.token_timestamps = toBool(b)
|
||||||
|
}
|
||||||
|
|
||||||
// Set max tokens per segment (0 = no limit)
|
// Set max tokens per segment (0 = no limit)
|
||||||
func (p *Params) SetMaxTokensPerSegment(n int) {
|
func (p *Params) SetMaxTokensPerSegment(n int) {
|
||||||
p.max_tokens = C.int(n)
|
p.max_tokens = C.int(n)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set audio encoder context
|
||||||
|
func (p *Params) SetAudioCtx(n int) {
|
||||||
|
p.audio_ctx = C.int(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetMaxContext(n int) {
|
||||||
|
p.n_max_text_ctx = C.int(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetBeamSize(n int) {
|
||||||
|
p.beam_search.beam_size = C.int(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetEntropyThold(t float32) {
|
||||||
|
p.entropy_thold = C.float(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Params) SetTemperature(t float32) {
|
||||||
|
p.temperature = C.float(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sets the fallback temperature incrementation
|
||||||
|
// Pass -1.0 to disable this feature
|
||||||
|
func (p *Params) SetTemperatureFallback(t float32) {
|
||||||
|
p.temperature_inc = C.float(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set initial prompt
|
||||||
|
func (p *Params) SetInitialPrompt(prompt string) {
|
||||||
|
p.initial_prompt = C.CString(prompt)
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// PRIVATE METHODS
|
// PRIVATE METHODS
|
||||||
|
|
||||||
@ -133,6 +169,12 @@ func (p *Params) String() string {
|
|||||||
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
||||||
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
||||||
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
||||||
|
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
||||||
|
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
||||||
|
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
||||||
|
str += fmt.Sprintf(" temperature=%f", p.temperature)
|
||||||
|
str += fmt.Sprintf(" temperature_inc=%f", p.temperature_inc)
|
||||||
|
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
||||||
if p.translate {
|
if p.translate {
|
||||||
str += " translate"
|
str += " translate"
|
||||||
}
|
}
|
||||||
@ -157,9 +199,6 @@ func (p *Params) String() string {
|
|||||||
if p.token_timestamps {
|
if p.token_timestamps {
|
||||||
str += " token_timestamps"
|
str += " token_timestamps"
|
||||||
}
|
}
|
||||||
if p.speed_up {
|
|
||||||
str += " speed_up"
|
|
||||||
}
|
|
||||||
|
|
||||||
return str + ">"
|
return str + ">"
|
||||||
}
|
}
|
||||||
|
@ -76,9 +76,8 @@ func (context *context) SetTranslate(v bool) {
|
|||||||
context.params.SetTranslate(v)
|
context.params.SetTranslate(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set speedup flag
|
func (context *context) SetSplitOnWord(v bool) {
|
||||||
func (context *context) SetSpeedup(v bool) {
|
context.params.SetSplitOnWord(v)
|
||||||
context.params.SetSpeedup(v)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set number of threads to use
|
// Set number of threads to use
|
||||||
@ -93,7 +92,7 @@ func (context *context) SetOffset(v time.Duration) {
|
|||||||
|
|
||||||
// Set duration of audio to process
|
// Set duration of audio to process
|
||||||
func (context *context) SetDuration(v time.Duration) {
|
func (context *context) SetDuration(v time.Duration) {
|
||||||
context.params.SetOffset(int(v.Milliseconds()))
|
context.params.SetDuration(int(v.Milliseconds()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set timestamp token probability threshold (~0.01)
|
// Set timestamp token probability threshold (~0.01)
|
||||||
@ -111,11 +110,52 @@ func (context *context) SetMaxSegmentLength(n uint) {
|
|||||||
context.params.SetMaxSegmentLength(int(n))
|
context.params.SetMaxSegmentLength(int(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set token timestamps flag
|
||||||
|
func (context *context) SetTokenTimestamps(b bool) {
|
||||||
|
context.params.SetTokenTimestamps(b)
|
||||||
|
}
|
||||||
|
|
||||||
// Set max tokens per segment (0 = no limit)
|
// Set max tokens per segment (0 = no limit)
|
||||||
func (context *context) SetMaxTokensPerSegment(n uint) {
|
func (context *context) SetMaxTokensPerSegment(n uint) {
|
||||||
context.params.SetMaxTokensPerSegment(int(n))
|
context.params.SetMaxTokensPerSegment(int(n))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set audio encoder context
|
||||||
|
func (context *context) SetAudioCtx(n uint) {
|
||||||
|
context.params.SetAudioCtx(int(n))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set maximum number of text context tokens to store
|
||||||
|
func (context *context) SetMaxContext(n int) {
|
||||||
|
context.params.SetMaxContext(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set Beam Size
|
||||||
|
func (context *context) SetBeamSize(n int) {
|
||||||
|
context.params.SetBeamSize(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set Entropy threshold
|
||||||
|
func (context *context) SetEntropyThold(t float32) {
|
||||||
|
context.params.SetEntropyThold(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set Temperature
|
||||||
|
func (context *context) SetTemperature(t float32) {
|
||||||
|
context.params.SetTemperature(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the fallback temperature incrementation
|
||||||
|
// Pass -1.0 to disable this feature
|
||||||
|
func (context *context) SetTemperatureFallback(t float32) {
|
||||||
|
context.params.SetTemperatureFallback(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set initial prompt
|
||||||
|
func (context *context) SetInitialPrompt(prompt string) {
|
||||||
|
context.params.SetInitialPrompt(prompt)
|
||||||
|
}
|
||||||
|
|
||||||
// ResetTimings resets the mode timings. Should be called before processing
|
// ResetTimings resets the mode timings. Should be called before processing
|
||||||
func (context *context) ResetTimings() {
|
func (context *context) ResetTimings() {
|
||||||
context.model.ctx.Whisper_reset_timings()
|
context.model.ctx.Whisper_reset_timings()
|
||||||
@ -147,12 +187,16 @@ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]f
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Process new sample data and return any errors
|
// Process new sample data and return any errors
|
||||||
func (context *context) Process(data []float32, cb SegmentCallback) error {
|
func (context *context) Process(
|
||||||
|
data []float32,
|
||||||
|
callNewSegment SegmentCallback,
|
||||||
|
callProgress ProgressCallback,
|
||||||
|
) error {
|
||||||
if context.model.ctx == nil {
|
if context.model.ctx == nil {
|
||||||
return ErrInternalAppError
|
return ErrInternalAppError
|
||||||
}
|
}
|
||||||
// If the callback is defined then we force on single_segment mode
|
// If the callback is defined then we force on single_segment mode
|
||||||
if cb != nil {
|
if callNewSegment != nil {
|
||||||
context.params.SetSingleSegment(true)
|
context.params.SetSingleSegment(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,24 +204,28 @@ func (context *context) Process(data []float32, cb SegmentCallback) error {
|
|||||||
processors := 0
|
processors := 0
|
||||||
if processors > 1 {
|
if processors > 1 {
|
||||||
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
||||||
if cb != nil {
|
if callNewSegment != nil {
|
||||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||||
s0 := num_segments - new
|
s0 := num_segments - new
|
||||||
for i := s0; i < num_segments; i++ {
|
for i := s0; i < num_segments; i++ {
|
||||||
cb(toSegment(context.model.ctx, i))
|
callNewSegment(toSegment(context.model.ctx, i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
||||||
if cb != nil {
|
if callNewSegment != nil {
|
||||||
num_segments := context.model.ctx.Whisper_full_n_segments()
|
num_segments := context.model.ctx.Whisper_full_n_segments()
|
||||||
s0 := num_segments - new
|
s0 := num_segments - new
|
||||||
for i := s0; i < num_segments; i++ {
|
for i := s0; i < num_segments; i++ {
|
||||||
cb(toSegment(context.model.ctx, i))
|
callNewSegment(toSegment(context.model.ctx, i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}, func(progress int) {
|
||||||
|
if callProgress != nil {
|
||||||
|
callProgress(progress)
|
||||||
|
}
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -280,10 +328,14 @@ func toSegment(ctx *whisper.Context, n int) Segment {
|
|||||||
func toTokens(ctx *whisper.Context, n int) []Token {
|
func toTokens(ctx *whisper.Context, n int) []Token {
|
||||||
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
||||||
for i := 0; i < len(result); i++ {
|
for i := 0; i < len(result); i++ {
|
||||||
|
data := ctx.Whisper_full_get_token_data(n, i)
|
||||||
|
|
||||||
result[i] = Token{
|
result[i] = Token{
|
||||||
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
||||||
Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
|
Text: ctx.Whisper_full_get_token_text(n, i),
|
||||||
P: ctx.Whisper_full_get_token_p(n, i),
|
P: ctx.Whisper_full_get_token_p(n, i),
|
||||||
|
Start: time.Duration(data.T0()) * time.Millisecond * 10,
|
||||||
|
End: time.Duration(data.T1()) * time.Millisecond * 10,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
@ -4,52 +4,90 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
// Packages
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
"github.com/go-audio/wav"
|
||||||
assert "github.com/stretchr/testify/assert"
|
assert "github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
func TestSetLanguage(t *testing.T) {
|
||||||
ModelPath = "../../models/ggml-tiny.bin"
|
|
||||||
SamplePath = "../../samples/jfk.wav"
|
|
||||||
)
|
|
||||||
|
|
||||||
func Test_Whisper_000(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
|
||||||
t.Skip("Skipping test, model not found:", ModelPath)
|
|
||||||
}
|
|
||||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
|
||||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load model
|
|
||||||
model, err := whisper.New(ModelPath)
|
|
||||||
assert.NoError(err)
|
|
||||||
assert.NotNil(model)
|
|
||||||
assert.NoError(model.Close())
|
|
||||||
|
|
||||||
t.Log("languages=", model.Languages())
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Whisper_001(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
|
||||||
t.Skip("Skipping test, model not found:", ModelPath)
|
|
||||||
}
|
|
||||||
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
|
||||||
t.Skip("Skipping test, sample not found:", SamplePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load model
|
|
||||||
model, err := whisper.New(ModelPath)
|
model, err := whisper.New(ModelPath)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.NotNil(model)
|
assert.NotNil(model)
|
||||||
defer model.Close()
|
defer model.Close()
|
||||||
|
|
||||||
// Get context for decoding
|
context, err := model.NewContext()
|
||||||
ctx, err := model.NewContext()
|
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
assert.NotNil(ctx)
|
|
||||||
|
|
||||||
|
// This returns an error since
|
||||||
|
// the model 'models/ggml-small.en.bin'
|
||||||
|
// that is loaded is not multilingual
|
||||||
|
err = context.SetLanguage("en")
|
||||||
|
assert.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestContextModelIsMultilingual(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
context, err := model.NewContext()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
|
isMultilingual := context.IsMultilingual()
|
||||||
|
|
||||||
|
// This returns false since
|
||||||
|
// the model 'models/ggml-small.en.bin'
|
||||||
|
// that is loaded is not multilingual
|
||||||
|
assert.False(isMultilingual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLanguage(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
context, err := model.NewContext()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
|
// This always returns en since
|
||||||
|
// the model 'models/ggml-small.en.bin'
|
||||||
|
// that is loaded is not multilingual
|
||||||
|
expectedLanguage := "en"
|
||||||
|
actualLanguage := context.Language()
|
||||||
|
assert.Equal(expectedLanguage, actualLanguage)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProcess(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
fh, err := os.Open(SamplePath)
|
||||||
|
assert.NoError(err)
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
// Decode the WAV file - load the full buffer
|
||||||
|
dec := wav.NewDecoder(fh)
|
||||||
|
buf, err := dec.FullPCMBuffer()
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(uint16(1), dec.NumChans)
|
||||||
|
|
||||||
|
data := buf.AsFloat32Buffer().Data
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
context, err := model.NewContext()
|
||||||
|
assert.NoError(err)
|
||||||
|
|
||||||
|
err = context.Process(data, nil, nil)
|
||||||
|
assert.NoError(err)
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,10 @@ import (
|
|||||||
// time. It is called during the Process function
|
// time. It is called during the Process function
|
||||||
type SegmentCallback func(Segment)
|
type SegmentCallback func(Segment)
|
||||||
|
|
||||||
|
// ProgressCallback is the callback function for reporting progress during
|
||||||
|
// processing. It is called during the Process function
|
||||||
|
type ProgressCallback func(int)
|
||||||
|
|
||||||
// Model is the interface to a whisper model. Create a new model with the
|
// Model is the interface to a whisper model. Create a new model with the
|
||||||
// function whisper.New(string)
|
// function whisper.New(string)
|
||||||
type Model interface {
|
type Model interface {
|
||||||
@ -34,19 +38,27 @@ type Context interface {
|
|||||||
IsMultilingual() bool // Return true if the model is multilingual.
|
IsMultilingual() bool // Return true if the model is multilingual.
|
||||||
Language() string // Get language
|
Language() string // Get language
|
||||||
|
|
||||||
SetOffset(time.Duration) // Set offset
|
SetOffset(time.Duration) // Set offset
|
||||||
SetDuration(time.Duration) // Set duration
|
SetDuration(time.Duration) // Set duration
|
||||||
SetThreads(uint) // Set number of threads to use
|
SetThreads(uint) // Set number of threads to use
|
||||||
SetSpeedup(bool) // Set speedup flag
|
SetSplitOnWord(bool) // Set split on word flag
|
||||||
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
||||||
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
||||||
SetMaxSegmentLength(uint) // Set max segment length in characters
|
SetMaxSegmentLength(uint) // Set max segment length in characters
|
||||||
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
SetTokenTimestamps(bool) // Set token timestamps flag
|
||||||
|
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
||||||
|
SetAudioCtx(uint) // Set audio encoder context
|
||||||
|
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
||||||
|
SetBeamSize(n int) // Set Beam Size
|
||||||
|
SetEntropyThold(t float32) // Set Entropy threshold
|
||||||
|
SetInitialPrompt(prompt string) // Set initial prompt
|
||||||
|
SetTemperature(t float32) // Set temperature
|
||||||
|
SetTemperatureFallback(t float32) // Set temperature incrementation
|
||||||
|
|
||||||
// Process mono audio data and return any errors.
|
// Process mono audio data and return any errors.
|
||||||
// If defined, newly generated segments are passed to the
|
// If defined, newly generated segments are passed to the
|
||||||
// callback function during processing.
|
// callback function during processing.
|
||||||
Process([]float32, SegmentCallback) error
|
Process([]float32, SegmentCallback, ProgressCallback) error
|
||||||
|
|
||||||
// After process is called, return segments until the end of the stream
|
// After process is called, return segments until the end of the stream
|
||||||
// is reached, when io.EOF is returned.
|
// is reached, when io.EOF is returned.
|
||||||
@ -85,7 +97,8 @@ type Segment struct {
|
|||||||
|
|
||||||
// Token is a text or special token
|
// Token is a text or special token
|
||||||
type Token struct {
|
type Token struct {
|
||||||
Id int
|
Id int
|
||||||
Text string
|
Text string
|
||||||
P float32
|
P float32
|
||||||
|
Start, End time.Duration
|
||||||
}
|
}
|
||||||
|
91
bindings/go/pkg/whisper/model_test.go
Normal file
91
bindings/go/pkg/whisper/model_test.go
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
package whisper_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
|
assert "github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNew(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
t.Run("valid model path", func(t *testing.T) {
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid model path", func(t *testing.T) {
|
||||||
|
invalidModelPath := "invalid-model-path.bin"
|
||||||
|
model, err := whisper.New(invalidModelPath)
|
||||||
|
assert.Error(err)
|
||||||
|
assert.Nil(model)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClose(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
|
||||||
|
err = model.Close()
|
||||||
|
assert.NoError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewContext(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
context, err := model.NewContext()
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(context)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsMultilingual(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
isMultilingual := model.IsMultilingual()
|
||||||
|
|
||||||
|
// This returns false since
|
||||||
|
// the model 'models/ggml-small.en.bin'
|
||||||
|
// that is loaded is not multilingual
|
||||||
|
assert.False(isMultilingual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLanguages(t *testing.T) {
|
||||||
|
assert := assert.New(t)
|
||||||
|
|
||||||
|
model, err := whisper.New(ModelPath)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.NotNil(model)
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
expectedLanguages := []string{
|
||||||
|
"en", "zh", "de", "es", "ru", "ko", "fr", "ja", "pt", "tr", "pl",
|
||||||
|
"ca", "nl", "ar", "sv", "it", "id", "hi", "fi", "vi", "he", "uk",
|
||||||
|
"el", "ms", "cs", "ro", "da", "hu", "ta", "no", "th", "ur", "hr",
|
||||||
|
"bg", "lt", "la", "mi", "ml", "cy", "sk", "te", "fa", "lv", "bn",
|
||||||
|
"sr", "az", "sl", "kn", "et", "mk", "br", "eu", "is", "hy", "ne",
|
||||||
|
"mn", "bs", "kk", "sq", "sw", "gl", "mr", "pa", "si", "km", "sn",
|
||||||
|
"yo", "so", "af", "oc", "ka", "be", "tg", "sd", "gu", "am", "yi",
|
||||||
|
"lo", "uz", "fo", "ht", "ps", "tk", "nn", "mt", "sa", "lb", "my",
|
||||||
|
"bo", "tl", "mg", "as", "tt", "haw", "ln", "ha", "ba", "jw", "su",
|
||||||
|
}
|
||||||
|
|
||||||
|
actualLanguages := model.Languages()
|
||||||
|
|
||||||
|
assert.Equal(expectedLanguages, actualLanguages)
|
||||||
|
}
|
6
bindings/go/pkg/whisper/util_test.go
Normal file
6
bindings/go/pkg/whisper/util_test.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
package whisper_test
|
||||||
|
|
||||||
|
const (
|
||||||
|
ModelPath = "../../models/ggml-small.en.bin"
|
||||||
|
SamplePath = "../../samples/jfk.wav"
|
||||||
|
)
|
@ -9,12 +9,13 @@ import (
|
|||||||
// CGO
|
// CGO
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#cgo LDFLAGS: -lwhisper -lm -lstdc++
|
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
||||||
#cgo darwin LDFLAGS: -framework Accelerate
|
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
||||||
#include <whisper.h>
|
#include <whisper.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
extern void callNewSegment(void* user_data, int new);
|
extern void callNewSegment(void* user_data, int new);
|
||||||
|
extern void callProgress(void* user_data, int progress);
|
||||||
extern bool callEncoderBegin(void* user_data);
|
extern bool callEncoderBegin(void* user_data);
|
||||||
|
|
||||||
// Text segment callback
|
// Text segment callback
|
||||||
@ -26,6 +27,15 @@ static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Progress callback
|
||||||
|
// Called on every newly generated text segment
|
||||||
|
// Use the whisper_full_...() functions to obtain the text segments
|
||||||
|
static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
|
||||||
|
if(user_data != NULL && ctx != NULL) {
|
||||||
|
callProgress(user_data, progress);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Encoder begin callback
|
// Encoder begin callback
|
||||||
// If not NULL, called before the encoder starts
|
// If not NULL, called before the encoder starts
|
||||||
// If it returns false, the computation is aborted
|
// If it returns false, the computation is aborted
|
||||||
@ -43,6 +53,8 @@ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_
|
|||||||
params.new_segment_callback_user_data = (void*)(ctx);
|
params.new_segment_callback_user_data = (void*)(ctx);
|
||||||
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
||||||
params.encoder_begin_callback_user_data = (void*)(ctx);
|
params.encoder_begin_callback_user_data = (void*)(ctx);
|
||||||
|
params.progress_callback = whisper_progress_cb;
|
||||||
|
params.progress_callback_user_data = (void*)(ctx);
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
@ -71,7 +83,6 @@ const (
|
|||||||
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
||||||
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
||||||
NumFFT = C.WHISPER_N_FFT
|
NumFFT = C.WHISPER_N_FFT
|
||||||
NumMEL = C.WHISPER_N_MEL
|
|
||||||
HopLength = C.WHISPER_HOP_LENGTH
|
HopLength = C.WHISPER_HOP_LENGTH
|
||||||
ChunkSize = C.WHISPER_CHUNK_SIZE
|
ChunkSize = C.WHISPER_CHUNK_SIZE
|
||||||
)
|
)
|
||||||
@ -91,7 +102,7 @@ var (
|
|||||||
func Whisper_init(path string) *Context {
|
func Whisper_init(path string) *Context {
|
||||||
cPath := C.CString(path)
|
cPath := C.CString(path)
|
||||||
defer C.free(unsafe.Pointer(cPath))
|
defer C.free(unsafe.Pointer(cPath))
|
||||||
if ctx := C.whisper_init_from_file(cPath); ctx != nil {
|
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
||||||
return (*Context)(ctx)
|
return (*Context)(ctx)
|
||||||
} else {
|
} else {
|
||||||
return nil
|
return nil
|
||||||
@ -258,13 +269,13 @@ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Task tokens
|
// Task tokens
|
||||||
func Whisper_token_translate() Token {
|
func (ctx *Context) Whisper_token_translate() Token {
|
||||||
return Token(C.whisper_token_translate())
|
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Task tokens
|
// Task tokens
|
||||||
func Whisper_token_transcribe() Token {
|
func (ctx *Context) Whisper_token_transcribe() Token {
|
||||||
return Token(C.whisper_token_transcribe())
|
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Performance information
|
// Performance information
|
||||||
@ -290,11 +301,19 @@ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Param
|
|||||||
|
|
||||||
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||||
// Uses the specified decoding strategy to obtain the text.
|
// Uses the specified decoding strategy to obtain the text.
|
||||||
func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
|
func (ctx *Context) Whisper_full(
|
||||||
|
params Params,
|
||||||
|
samples []float32,
|
||||||
|
encoderBeginCallback func() bool,
|
||||||
|
newSegmentCallback func(int),
|
||||||
|
progressCallback func(int),
|
||||||
|
) error {
|
||||||
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
||||||
registerNewSegmentCallback(ctx, newSegmentCallback)
|
registerNewSegmentCallback(ctx, newSegmentCallback)
|
||||||
|
registerProgressCallback(ctx, progressCallback)
|
||||||
defer registerEncoderBeginCallback(ctx, nil)
|
defer registerEncoderBeginCallback(ctx, nil)
|
||||||
defer registerNewSegmentCallback(ctx, nil)
|
defer registerNewSegmentCallback(ctx, nil)
|
||||||
|
defer registerProgressCallback(ctx, nil)
|
||||||
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
@ -318,6 +337,18 @@ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, proc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the id of the autodetected language, returns -1 if not found
|
||||||
|
// Added to whisper.cpp in
|
||||||
|
// https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
//
|
||||||
|
// "de" -> 2
|
||||||
|
// "german" -> 2
|
||||||
|
func (ctx *Context) Whisper_full_lang_id() int {
|
||||||
|
return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
|
||||||
|
}
|
||||||
|
|
||||||
// Number of generated text segments.
|
// Number of generated text segments.
|
||||||
// A segment can be a few words, a sentence, or even a paragraph.
|
// A segment can be a few words, a sentence, or even a paragraph.
|
||||||
func (ctx *Context) Whisper_full_n_segments() int {
|
func (ctx *Context) Whisper_full_n_segments() int {
|
||||||
@ -356,7 +387,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
|
|||||||
|
|
||||||
// Get token data for the specified token in the specified segment.
|
// Get token data for the specified token in the specified segment.
|
||||||
// This contains probabilities, timestamps, etc.
|
// This contains probabilities, timestamps, etc.
|
||||||
func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
|
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
|
||||||
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -370,6 +401,7 @@ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
||||||
|
cbProgress = make(map[unsafe.Pointer]func(int))
|
||||||
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -381,6 +413,14 @@ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func registerProgressCallback(ctx *Context, fn func(int)) {
|
||||||
|
if fn == nil {
|
||||||
|
delete(cbProgress, unsafe.Pointer(ctx))
|
||||||
|
} else {
|
||||||
|
cbProgress[unsafe.Pointer(ctx)] = fn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
||||||
if fn == nil {
|
if fn == nil {
|
||||||
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
||||||
@ -396,6 +436,13 @@ func callNewSegment(user_data unsafe.Pointer, new C.int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//export callProgress
|
||||||
|
func callProgress(user_data unsafe.Pointer, progress C.int) {
|
||||||
|
if fn, ok := cbProgress[user_data]; ok {
|
||||||
|
fn(int(progress))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//export callEncoderBegin
|
//export callEncoderBegin
|
||||||
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
||||||
if fn, ok := cbEncoderBegin[user_data]; ok {
|
if fn, ok := cbEncoderBegin[user_data]; ok {
|
||||||
@ -407,3 +454,15 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t TokenData) T0() int64 {
|
||||||
|
return int64(t.t0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenData) T1() int64 {
|
||||||
|
return int64(t.t1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t TokenData) Id() Token {
|
||||||
|
return Token(t.id)
|
||||||
|
}
|
||||||
|
@ -52,7 +52,7 @@ func Test_Whisper_001(t *testing.T) {
|
|||||||
defer ctx.Whisper_free()
|
defer ctx.Whisper_free()
|
||||||
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
||||||
data := buf.AsFloat32Buffer().Data
|
data := buf.AsFloat32Buffer().Data
|
||||||
err = ctx.Whisper_full(params, data, nil, nil)
|
err = ctx.Whisper_full(params, data, nil, nil, nil)
|
||||||
assert.NoError(err)
|
assert.NoError(err)
|
||||||
|
|
||||||
// Print out tokens
|
// Print out tokens
|
||||||
|
Submodule bindings/ios deleted from 92d4c5c9a0
124
bindings/java/.idea/uiDesigner.xml
generated
Normal file
124
bindings/java/.idea/uiDesigner.xml
generated
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Palette2">
|
||||||
|
<group name="Swing">
|
||||||
|
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
||||||
|
<initial-values>
|
||||||
|
<property name="text" value="Button" />
|
||||||
|
</initial-values>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||||
|
<initial-values>
|
||||||
|
<property name="text" value="RadioButton" />
|
||||||
|
</initial-values>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
||||||
|
<initial-values>
|
||||||
|
<property name="text" value="CheckBox" />
|
||||||
|
</initial-values>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
||||||
|
<initial-values>
|
||||||
|
<property name="text" value="Label" />
|
||||||
|
</initial-values>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||||
|
<preferred-size width="150" height="-1" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||||
|
<preferred-size width="150" height="-1" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
||||||
|
<preferred-size width="150" height="-1" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
||||||
|
<preferred-size width="150" height="50" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||||
|
<preferred-size width="200" height="200" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
||||||
|
<preferred-size width="200" height="200" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
||||||
|
<preferred-size width="-1" height="20" />
|
||||||
|
</default-constraints>
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
||||||
|
</item>
|
||||||
|
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
||||||
|
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
||||||
|
</item>
|
||||||
|
</group>
|
||||||
|
</component>
|
||||||
|
</project>
|
71
bindings/java/README.md
Normal file
71
bindings/java/README.md
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
# Java JNI bindings for Whisper
|
||||||
|
|
||||||
|
This package provides Java JNI bindings for whisper.cpp. They have been tested on:
|
||||||
|
|
||||||
|
* <strike>Darwin (OS X) 12.6 on x64_64</strike>
|
||||||
|
* Ubuntu on x86_64
|
||||||
|
* Windows on x86_64
|
||||||
|
|
||||||
|
The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
|
||||||
|
|
||||||
|
JNA will attempt to load the `whispercpp` shared library from:
|
||||||
|
|
||||||
|
- jna.library.path
|
||||||
|
- jna.platform.library
|
||||||
|
- ~/Library/Frameworks
|
||||||
|
- /Library/Frameworks
|
||||||
|
- /System/Library/Frameworks
|
||||||
|
- classpath
|
||||||
|
|
||||||
|
```java
|
||||||
|
import io.github.ggerganov.whispercpp.WhisperCpp;
|
||||||
|
|
||||||
|
public class Example {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
WhisperCpp whisper = new WhisperCpp();
|
||||||
|
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
||||||
|
// or you can provide the absolute path to the model file.
|
||||||
|
long context = whisper.initContext("base.en");
|
||||||
|
try {
|
||||||
|
var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||||
|
// custom configuration if required
|
||||||
|
whisperParams.temperature_inc = 0f;
|
||||||
|
|
||||||
|
var samples = readAudio(); // divide each value by 32767.0f
|
||||||
|
whisper.fullTranscribe(whisperParams, samples);
|
||||||
|
|
||||||
|
int segmentCount = whisper.getTextSegmentCount(context);
|
||||||
|
for (int i = 0; i < segmentCount; i++) {
|
||||||
|
String text = whisper.getTextSegment(context, i);
|
||||||
|
System.out.println(segment.getText());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
whisper.freeContext(context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Building & Testing
|
||||||
|
|
||||||
|
In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
|
cd whisper.cpp/bindings/java
|
||||||
|
|
||||||
|
./gradlew build
|
||||||
|
```
|
||||||
|
|
||||||
|
You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
The license for the Java bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
||||||
|
|
133
bindings/java/build.gradle
Normal file
133
bindings/java/build.gradle
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'java-library'
|
||||||
|
id 'maven-publish'
|
||||||
|
id 'signing'
|
||||||
|
}
|
||||||
|
|
||||||
|
archivesBaseName = 'whispercpp'
|
||||||
|
group = 'io.github.ggerganov'
|
||||||
|
version = '1.4.0'
|
||||||
|
|
||||||
|
|
||||||
|
sourceCompatibility = 1.8
|
||||||
|
targetCompatibility = 1.8
|
||||||
|
|
||||||
|
sourceSets {
|
||||||
|
main {
|
||||||
|
resources {
|
||||||
|
srcDirs = ['src/main/resources', 'build/generated/resources/main']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test {
|
||||||
|
runtimeClasspath += files('build/generated/resources/main')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register('copyLibwhisperDynlib', Copy) {
|
||||||
|
from '../../build'
|
||||||
|
include 'libwhisper.dynlib'
|
||||||
|
into 'build/generated/resources/main/darwin'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register('copyLibwhisperSo', Copy) {
|
||||||
|
from '../../build'
|
||||||
|
include 'libwhisper.so'
|
||||||
|
into 'build/generated/resources/main/linux-x86-64'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register('copyWhisperDll', Copy) {
|
||||||
|
from '../../build/Release'
|
||||||
|
include 'whisper.dll'
|
||||||
|
into 'build/generated/resources/main/windows-x86-64'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.register('copyLibs') {
|
||||||
|
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
|
||||||
|
}
|
||||||
|
|
||||||
|
test {
|
||||||
|
systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
withSourcesJar()
|
||||||
|
withJavadocJar()
|
||||||
|
}
|
||||||
|
|
||||||
|
jar {
|
||||||
|
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
||||||
|
}
|
||||||
|
|
||||||
|
javadoc {
|
||||||
|
options.addStringOption('Xdoclint:none', '-quiet')
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.withType(Test) {
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation "net.java.dev.jna:jna:5.13.0"
|
||||||
|
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
||||||
|
testImplementation "org.assertj:assertj-core:3.24.2"
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
}
|
||||||
|
|
||||||
|
publishing {
|
||||||
|
publications {
|
||||||
|
mavenJava(MavenPublication) {
|
||||||
|
artifactId = 'whispercpp'
|
||||||
|
from components.java
|
||||||
|
pom {
|
||||||
|
name = 'whispercpp'
|
||||||
|
description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
|
||||||
|
url = 'https://github.com/ggerganov/whisper.cpp'
|
||||||
|
licenses {
|
||||||
|
license {
|
||||||
|
name = 'MIT licence'
|
||||||
|
url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
developers {
|
||||||
|
developer {
|
||||||
|
id = 'ggerganov'
|
||||||
|
name = 'Georgi Gerganov'
|
||||||
|
email = 'ggerganov@gmail.com'
|
||||||
|
}
|
||||||
|
developer {
|
||||||
|
id = 'nalbion'
|
||||||
|
name = 'Nicholas Albion'
|
||||||
|
email = 'nalbion@yahoo.com'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scm {
|
||||||
|
connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
|
||||||
|
url = 'https://github.com/ggerganov/whisper.cpp'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
maven {
|
||||||
|
def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
|
||||||
|
def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
|
||||||
|
url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
|
||||||
|
credentials {
|
||||||
|
username = System.getenv("MAVEN_USERNAME")
|
||||||
|
password = System.getenv("MAVEN_PASSWORD")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
signing {
|
||||||
|
def signingKey = System.getenv("PGP_SECRET")
|
||||||
|
def signingPassword = System.getenv("PGP_PASSPHRASE")
|
||||||
|
useInMemoryPgpKeys(signingKey, signingPassword)
|
||||||
|
sign publishing.publications.mavenJava
|
||||||
|
}
|
6
bindings/java/gradle.properties
Normal file
6
bindings/java/gradle.properties
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
org.gradle.jvmargs=-Xms256m -Xmx1024m
|
||||||
|
system.include.dir=/usr/include
|
||||||
|
#system.local.include.dir=../../include
|
||||||
|
system.local.include.dir=./build/generated/sources/headers/java/main
|
||||||
|
jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
|
||||||
|
jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
|
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
bindings/java/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
6
bindings/java/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
6
bindings/java/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
distributionBase=GRADLE_USER_HOME
|
||||||
|
distributionPath=wrapper/dists
|
||||||
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
|
||||||
|
networkTimeout=10000
|
||||||
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
zipStorePath=wrapper/dists
|
244
bindings/java/gradlew
vendored
Normal file
244
bindings/java/gradlew
vendored
Normal file
@ -0,0 +1,244 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright © 2015-2021 the original authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# Gradle start up script for POSIX generated by Gradle.
|
||||||
|
#
|
||||||
|
# Important for running:
|
||||||
|
#
|
||||||
|
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||||
|
# noncompliant, but you have some other compliant shell such as ksh or
|
||||||
|
# bash, then to run this script, type that shell name before the whole
|
||||||
|
# command line, like:
|
||||||
|
#
|
||||||
|
# ksh Gradle
|
||||||
|
#
|
||||||
|
# Busybox and similar reduced shells will NOT work, because this script
|
||||||
|
# requires all of these POSIX shell features:
|
||||||
|
# * functions;
|
||||||
|
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||||
|
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||||
|
# * compound commands having a testable exit status, especially «case»;
|
||||||
|
# * various built-in commands including «command», «set», and «ulimit».
|
||||||
|
#
|
||||||
|
# Important for patching:
|
||||||
|
#
|
||||||
|
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||||
|
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||||
|
#
|
||||||
|
# The "traditional" practice of packing multiple parameters into a
|
||||||
|
# space-separated string is a well documented source of bugs and security
|
||||||
|
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||||
|
# options in "$@", and eventually passing that to Java.
|
||||||
|
#
|
||||||
|
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||||
|
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||||
|
# see the in-line comments for details.
|
||||||
|
#
|
||||||
|
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||||
|
# Darwin, MinGW, and NonStop.
|
||||||
|
#
|
||||||
|
# (3) This script is generated from the Groovy template
|
||||||
|
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||||
|
# within the Gradle project.
|
||||||
|
#
|
||||||
|
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Attempt to set APP_HOME
|
||||||
|
|
||||||
|
# Resolve links: $0 may be a link
|
||||||
|
app_path=$0
|
||||||
|
|
||||||
|
# Need this for daisy-chained symlinks.
|
||||||
|
while
|
||||||
|
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||||
|
[ -h "$app_path" ]
|
||||||
|
do
|
||||||
|
ls=$( ls -ld "$app_path" )
|
||||||
|
link=${ls#*' -> '}
|
||||||
|
case $link in #(
|
||||||
|
/*) app_path=$link ;; #(
|
||||||
|
*) app_path=$APP_HOME$link ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# This is normally unused
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
APP_BASE_NAME=${0##*/}
|
||||||
|
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
||||||
|
|
||||||
|
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||||
|
|
||||||
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
|
MAX_FD=maximum
|
||||||
|
|
||||||
|
warn () {
|
||||||
|
echo "$*"
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
die () {
|
||||||
|
echo
|
||||||
|
echo "$*"
|
||||||
|
echo
|
||||||
|
exit 1
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
# OS specific support (must be 'true' or 'false').
|
||||||
|
cygwin=false
|
||||||
|
msys=false
|
||||||
|
darwin=false
|
||||||
|
nonstop=false
|
||||||
|
case "$( uname )" in #(
|
||||||
|
CYGWIN* ) cygwin=true ;; #(
|
||||||
|
Darwin* ) darwin=true ;; #(
|
||||||
|
MSYS* | MINGW* ) msys=true ;; #(
|
||||||
|
NONSTOP* ) nonstop=true ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||||
|
|
||||||
|
|
||||||
|
# Determine the Java command to use to start the JVM.
|
||||||
|
if [ -n "$JAVA_HOME" ] ; then
|
||||||
|
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||||
|
# IBM's JDK on AIX uses strange locations for the executables
|
||||||
|
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||||
|
else
|
||||||
|
JAVACMD=$JAVA_HOME/bin/java
|
||||||
|
fi
|
||||||
|
if [ ! -x "$JAVACMD" ] ; then
|
||||||
|
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
JAVACMD=java
|
||||||
|
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Increase the maximum file descriptors if we can.
|
||||||
|
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||||
|
case $MAX_FD in #(
|
||||||
|
max*)
|
||||||
|
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC3045
|
||||||
|
MAX_FD=$( ulimit -H -n ) ||
|
||||||
|
warn "Could not query maximum file descriptor limit"
|
||||||
|
esac
|
||||||
|
case $MAX_FD in #(
|
||||||
|
'' | soft) :;; #(
|
||||||
|
*)
|
||||||
|
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC3045
|
||||||
|
ulimit -n "$MAX_FD" ||
|
||||||
|
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect all arguments for the java command, stacking in reverse order:
|
||||||
|
# * args from the command line
|
||||||
|
# * the main class name
|
||||||
|
# * -classpath
|
||||||
|
# * -D...appname settings
|
||||||
|
# * --module-path (only if needed)
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||||
|
|
||||||
|
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||||
|
if "$cygwin" || "$msys" ; then
|
||||||
|
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||||
|
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||||
|
|
||||||
|
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||||
|
|
||||||
|
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||||
|
for arg do
|
||||||
|
if
|
||||||
|
case $arg in #(
|
||||||
|
-*) false ;; # don't mess with options #(
|
||||||
|
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||||
|
[ -e "$t" ] ;; #(
|
||||||
|
*) false ;;
|
||||||
|
esac
|
||||||
|
then
|
||||||
|
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||||
|
fi
|
||||||
|
# Roll the args list around exactly as many times as the number of
|
||||||
|
# args, so each arg winds up back in the position where it started, but
|
||||||
|
# possibly modified.
|
||||||
|
#
|
||||||
|
# NB: a `for` loop captures its iteration list before it begins, so
|
||||||
|
# changing the positional parameters here affects neither the number of
|
||||||
|
# iterations, nor the values presented in `arg`.
|
||||||
|
shift # remove old arg
|
||||||
|
set -- "$@" "$arg" # push replacement arg
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect all arguments for the java command;
|
||||||
|
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
||||||
|
# shell script including quotes and variable substitutions, so put them in
|
||||||
|
# double quotes to make sure that they get re-expanded; and
|
||||||
|
# * put everything else in single quotes, so that it's not re-expanded.
|
||||||
|
|
||||||
|
set -- \
|
||||||
|
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||||
|
-classpath "$CLASSPATH" \
|
||||||
|
org.gradle.wrapper.GradleWrapperMain \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
# Stop when "xargs" is not available.
|
||||||
|
if ! command -v xargs >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "xargs is not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Use "xargs" to parse quoted args.
|
||||||
|
#
|
||||||
|
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||||
|
#
|
||||||
|
# In Bash we could simply go:
|
||||||
|
#
|
||||||
|
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||||
|
# set -- "${ARGS[@]}" "$@"
|
||||||
|
#
|
||||||
|
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||||
|
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||||
|
# character that might be a shell metacharacter, then use eval to reverse
|
||||||
|
# that process (while maintaining the separation between arguments), and wrap
|
||||||
|
# the whole thing up as a single "set" statement.
|
||||||
|
#
|
||||||
|
# This will of course break if any of these variables contains a newline or
|
||||||
|
# an unmatched quote.
|
||||||
|
#
|
||||||
|
|
||||||
|
eval "set -- $(
|
||||||
|
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||||
|
xargs -n1 |
|
||||||
|
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||||
|
tr '\n' ' '
|
||||||
|
)" '"$@"'
|
||||||
|
|
||||||
|
exec "$JAVACMD" "$@"
|
92
bindings/java/gradlew.bat
vendored
Normal file
92
bindings/java/gradlew.bat
vendored
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
@rem
|
||||||
|
@rem Copyright 2015 the original author or authors.
|
||||||
|
@rem
|
||||||
|
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@rem you may not use this file except in compliance with the License.
|
||||||
|
@rem You may obtain a copy of the License at
|
||||||
|
@rem
|
||||||
|
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
@rem
|
||||||
|
@rem Unless required by applicable law or agreed to in writing, software
|
||||||
|
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@rem See the License for the specific language governing permissions and
|
||||||
|
@rem limitations under the License.
|
||||||
|
@rem
|
||||||
|
|
||||||
|
@if "%DEBUG%"=="" @echo off
|
||||||
|
@rem ##########################################################################
|
||||||
|
@rem
|
||||||
|
@rem Gradle startup script for Windows
|
||||||
|
@rem
|
||||||
|
@rem ##########################################################################
|
||||||
|
|
||||||
|
@rem Set local scope for the variables with windows NT shell
|
||||||
|
if "%OS%"=="Windows_NT" setlocal
|
||||||
|
|
||||||
|
set DIRNAME=%~dp0
|
||||||
|
if "%DIRNAME%"=="" set DIRNAME=.
|
||||||
|
@rem This is normally unused
|
||||||
|
set APP_BASE_NAME=%~n0
|
||||||
|
set APP_HOME=%DIRNAME%
|
||||||
|
|
||||||
|
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||||
|
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||||
|
|
||||||
|
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||||
|
|
||||||
|
@rem Find java.exe
|
||||||
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||||
|
|
||||||
|
set JAVA_EXE=java.exe
|
||||||
|
%JAVA_EXE% -version >NUL 2>&1
|
||||||
|
if %ERRORLEVEL% equ 0 goto execute
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
echo.
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
echo location of your Java installation.
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:findJavaFromJavaHome
|
||||||
|
set JAVA_HOME=%JAVA_HOME:"=%
|
||||||
|
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||||
|
|
||||||
|
if exist "%JAVA_EXE%" goto execute
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
||||||
|
echo.
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
echo location of your Java installation.
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:execute
|
||||||
|
@rem Setup the command line
|
||||||
|
|
||||||
|
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||||
|
|
||||||
|
|
||||||
|
@rem Execute Gradle
|
||||||
|
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||||
|
|
||||||
|
:end
|
||||||
|
@rem End local scope for the variables with windows NT shell
|
||||||
|
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||||
|
|
||||||
|
:fail
|
||||||
|
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||||
|
rem the _cmd.exe /c_ return code!
|
||||||
|
set EXIT_CODE=%ERRORLEVEL%
|
||||||
|
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||||
|
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||||
|
exit /b %EXIT_CODE%
|
||||||
|
|
||||||
|
:mainEnd
|
||||||
|
if "%OS%"=="Windows_NT" endlocal
|
||||||
|
|
||||||
|
:omega
|
1
bindings/java/settings.gradle
Normal file
1
bindings/java/settings.gradle
Normal file
@ -0,0 +1 @@
|
|||||||
|
rootProject.name = "whispercpp"
|
@ -0,0 +1,41 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
import io.github.ggerganov.whispercpp.ggml.GgmlType;
|
||||||
|
import io.github.ggerganov.whispercpp.WhisperModel;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class WhisperContext extends Structure {
|
||||||
|
int t_load_us = 0;
|
||||||
|
int t_start_us = 0;
|
||||||
|
|
||||||
|
/** weight type (FP32 / FP16 / QX) */
|
||||||
|
GgmlType wtype = GgmlType.GGML_TYPE_F16;
|
||||||
|
/** intermediate type (FP32 or FP16) */
|
||||||
|
GgmlType itype = GgmlType.GGML_TYPE_F16;
|
||||||
|
|
||||||
|
// WhisperModel model;
|
||||||
|
public PointerByReference model;
|
||||||
|
// whisper_vocab vocab;
|
||||||
|
// whisper_state * state = nullptr;
|
||||||
|
public PointerByReference vocab;
|
||||||
|
public PointerByReference state;
|
||||||
|
|
||||||
|
/** populated by whisper_init_from_file_with_params() */
|
||||||
|
String path_model;
|
||||||
|
WhisperContextParams params;
|
||||||
|
|
||||||
|
// public static class ByReference extends WhisperContext implements Structure.ByReference {
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// public static class ByValue extends WhisperContext implements Structure.ByValue {
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// @Override
|
||||||
|
// protected List<String> getFieldOrder() {
|
||||||
|
// return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
|
||||||
|
// }
|
||||||
|
}
|
@ -0,0 +1,207 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import com.sun.jna.Native;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
|
||||||
|
*/
|
||||||
|
public class WhisperCpp implements AutoCloseable {
|
||||||
|
private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
|
||||||
|
private Pointer ctx = null;
|
||||||
|
private Pointer paramsPointer = null;
|
||||||
|
private Pointer greedyParamsPointer = null;
|
||||||
|
private Pointer beamParamsPointer = null;
|
||||||
|
|
||||||
|
public File modelDir() {
|
||||||
|
String modelDirPath = System.getenv("XDG_CACHE_HOME");
|
||||||
|
if (modelDirPath == null) {
|
||||||
|
modelDirPath = System.getProperty("user.home") + "/.cache";
|
||||||
|
}
|
||||||
|
|
||||||
|
return new File(modelDirPath, "whisper");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||||
|
*/
|
||||||
|
public void initContext(String modelPath) throws FileNotFoundException {
|
||||||
|
initContextImpl(modelPath, getContextDefaultParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
|
||||||
|
* @param params - params to use when initialising the context
|
||||||
|
*/
|
||||||
|
public void initContext(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||||
|
initContextImpl(modelPath, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initContextImpl(String modelPath, WhisperContextParams params) throws FileNotFoundException {
|
||||||
|
if (ctx != null) {
|
||||||
|
lib.whisper_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!modelPath.contains("/") && !modelPath.contains("\\")) {
|
||||||
|
if (!modelPath.endsWith(".bin")) {
|
||||||
|
modelPath = "ggml-" + modelPath.replace("-", ".") + ".bin";
|
||||||
|
}
|
||||||
|
|
||||||
|
modelPath = new File(modelDir(), modelPath).getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx = lib.whisper_init_from_file_with_params(modelPath, params);
|
||||||
|
|
||||||
|
if (ctx == null) {
|
||||||
|
throw new FileNotFoundException(modelPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||||
|
* Because this function allocates memory for the params, the caller must call either:
|
||||||
|
* - call `whisper_free_context_params()`
|
||||||
|
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||||
|
*/
|
||||||
|
public WhisperContextParams getContextDefaultParams() {
|
||||||
|
paramsPointer = lib.whisper_context_default_params_by_ref();
|
||||||
|
WhisperContextParams params = new WhisperContextParams(paramsPointer);
|
||||||
|
params.read();
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides default params which can be used with `whisper_full()` etc.
|
||||||
|
* Because this function allocates memory for the params, the caller must call either:
|
||||||
|
* - call `whisper_free_params()`
|
||||||
|
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||||
|
*
|
||||||
|
* @param strategy - GREEDY
|
||||||
|
*/
|
||||||
|
public WhisperFullParams getFullDefaultParams(WhisperSamplingStrategy strategy) {
|
||||||
|
Pointer pointer;
|
||||||
|
|
||||||
|
// whisper_full_default_params_by_ref allocates memory which we need to delete, so only create max 1 pointer for each strategy.
|
||||||
|
if (strategy == WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY) {
|
||||||
|
if (greedyParamsPointer == null) {
|
||||||
|
greedyParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||||
|
}
|
||||||
|
pointer = greedyParamsPointer;
|
||||||
|
} else {
|
||||||
|
if (beamParamsPointer == null) {
|
||||||
|
beamParamsPointer = lib.whisper_full_default_params_by_ref(strategy.ordinal());
|
||||||
|
}
|
||||||
|
pointer = beamParamsPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
WhisperFullParams params = new WhisperFullParams(pointer);
|
||||||
|
params.read();
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
freeContext();
|
||||||
|
freeParams();
|
||||||
|
System.out.println("Whisper closed");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void freeContext() {
|
||||||
|
if (ctx != null) {
|
||||||
|
lib.whisper_free(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void freeParams() {
|
||||||
|
if (paramsPointer != null) {
|
||||||
|
Native.free(Pointer.nativeValue(paramsPointer));
|
||||||
|
paramsPointer = null;
|
||||||
|
}
|
||||||
|
if (greedyParamsPointer != null) {
|
||||||
|
Native.free(Pointer.nativeValue(greedyParamsPointer));
|
||||||
|
greedyParamsPointer = null;
|
||||||
|
}
|
||||||
|
if (beamParamsPointer != null) {
|
||||||
|
Native.free(Pointer.nativeValue(beamParamsPointer));
|
||||||
|
beamParamsPointer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
|
||||||
|
* Not thread safe for same context
|
||||||
|
* Uses the specified decoding strategy to obtain the text.
|
||||||
|
*/
|
||||||
|
public String fullTranscribe(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||||
|
if (ctx == null) {
|
||||||
|
throw new IllegalStateException("Model not initialised");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||||
|
throw new IOException("Failed to process audio");
|
||||||
|
}
|
||||||
|
|
||||||
|
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||||
|
|
||||||
|
StringBuilder str = new StringBuilder();
|
||||||
|
|
||||||
|
for (int i = 0; i < nSegments; i++) {
|
||||||
|
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||||
|
System.out.println("Segment:" + text);
|
||||||
|
str.append(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.toString().trim();
|
||||||
|
}
|
||||||
|
public List<WhisperSegment> fullTranscribeWithTime(WhisperFullParams whisperParams, float[] audioData) throws IOException {
|
||||||
|
if (ctx == null) {
|
||||||
|
throw new IllegalStateException("Model not initialised");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lib.whisper_full(ctx, whisperParams, audioData, audioData.length) != 0) {
|
||||||
|
throw new IOException("Failed to process audio");
|
||||||
|
}
|
||||||
|
|
||||||
|
int nSegments = lib.whisper_full_n_segments(ctx);
|
||||||
|
List<WhisperSegment> segments= new ArrayList<>(nSegments);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < nSegments; i++) {
|
||||||
|
long t0 = lib.whisper_full_get_segment_t0(ctx, i);
|
||||||
|
String text = lib.whisper_full_get_segment_text(ctx, i);
|
||||||
|
long t1 = lib.whisper_full_get_segment_t1(ctx, i);
|
||||||
|
segments.add(new WhisperSegment(t0,t1,text));
|
||||||
|
}
|
||||||
|
|
||||||
|
return segments;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public int getTextSegmentCount(Pointer ctx) {
|
||||||
|
// return lib.whisper_full_n_segments(ctx);
|
||||||
|
// }
|
||||||
|
// public String getTextSegment(Pointer ctx, int index) {
|
||||||
|
// return lib.whisper_full_get_segment_text(ctx, index);
|
||||||
|
// }
|
||||||
|
|
||||||
|
public String getSystemInfo() {
|
||||||
|
return lib.whisper_print_system_info();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int benchMemcpy(int nthread) {
|
||||||
|
return lib.whisper_bench_memcpy(nthread);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int benchGgmlMulMat(int nthread) {
|
||||||
|
return lib.whisper_bench_ggml_mul_mat(nthread);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,388 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import com.sun.jna.Library;
|
||||||
|
import com.sun.jna.Native;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperContextParams;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||||
|
|
||||||
|
public interface WhisperCppJnaLibrary extends Library {
|
||||||
|
WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
|
||||||
|
|
||||||
|
String whisper_print_system_info();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DEPRECATED. Allocate (almost) all memory needed for the model by loading from a file.
|
||||||
|
*
|
||||||
|
* @param path_model Path to the model file
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_from_file(String path_model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides default params which can be used with `whisper_init_from_file_with_params()` etc.
|
||||||
|
* Because this function allocates memory for the params, the caller must call either:
|
||||||
|
* - call `whisper_free_context_params()`
|
||||||
|
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||||
|
*/
|
||||||
|
Pointer whisper_context_default_params_by_ref();
|
||||||
|
|
||||||
|
void whisper_free_context_params(Pointer params);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model by loading from a file.
|
||||||
|
*
|
||||||
|
* @param path_model Path to the model file
|
||||||
|
* @param params Pointer to whisper_context_params
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_from_file_with_params(String path_model, WhisperContextParams params);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model by loading from a buffer.
|
||||||
|
*
|
||||||
|
* @param buffer Model buffer
|
||||||
|
* @param buffer_size Size of the model buffer
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_from_buffer(Pointer buffer, int buffer_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model using a model loader.
|
||||||
|
*
|
||||||
|
* @param loader Model loader
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init(WhisperModelLoader loader);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model by loading from a file without allocating the state.
|
||||||
|
*
|
||||||
|
* @param path_model Path to the model file
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_from_file_no_state(String path_model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model by loading from a buffer without allocating the state.
|
||||||
|
*
|
||||||
|
* @param buffer Model buffer
|
||||||
|
* @param buffer_size Size of the model buffer
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_from_buffer_no_state(Pointer buffer, int buffer_size);
|
||||||
|
|
||||||
|
// Pointer whisper_init_from_buffer_no_state(Pointer buffer, long buffer_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate (almost) all memory needed for the model using a model loader without allocating the state.
|
||||||
|
*
|
||||||
|
* @param loader Model loader
|
||||||
|
* @return Whisper context on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_no_state(WhisperModelLoader loader);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate memory for the Whisper state.
|
||||||
|
*
|
||||||
|
* @param ctx Whisper context
|
||||||
|
* @return Whisper state on success, null on failure
|
||||||
|
*/
|
||||||
|
Pointer whisper_init_state(Pointer ctx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free all allocated memory associated with the Whisper context.
|
||||||
|
*
|
||||||
|
* @param ctx Whisper context
|
||||||
|
*/
|
||||||
|
void whisper_free(Pointer ctx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Free all allocated memory associated with the Whisper state.
|
||||||
|
*
|
||||||
|
* @param state Whisper state
|
||||||
|
*/
|
||||||
|
void whisper_free_state(Pointer state);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert RAW PCM audio to log mel spectrogram.
|
||||||
|
* The resulting spectrogram is stored inside the default state of the provided whisper context.
|
||||||
|
*
|
||||||
|
* @param ctx - Pointer to a WhisperContext
|
||||||
|
* @return 0 on success
|
||||||
|
*/
|
||||||
|
int whisper_pcm_to_mel(Pointer ctx, final float[] samples, int n_samples, int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param ctx Pointer to a WhisperContext
|
||||||
|
* @param state Pointer to WhisperState
|
||||||
|
* @param n_samples
|
||||||
|
* @param n_threads
|
||||||
|
* @return 0 on success
|
||||||
|
*/
|
||||||
|
int whisper_pcm_to_mel_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
|
||||||
|
* Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
||||||
|
* n_mel must be 80
|
||||||
|
* @return 0 on success
|
||||||
|
*/
|
||||||
|
int whisper_set_mel(Pointer ctx, final float[] data, int n_len, int n_mel);
|
||||||
|
int whisper_set_mel_with_state(Pointer ctx, Pointer state, final float[] data, int n_len, int n_mel);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
|
||||||
|
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
||||||
|
* Offset can be used to specify the offset of the first frame in the spectrogram.
|
||||||
|
* @return 0 on success
|
||||||
|
*/
|
||||||
|
int whisper_encode(Pointer ctx, int offset, int n_threads);
|
||||||
|
|
||||||
|
int whisper_encode_with_state(Pointer ctx, Pointer state, int offset, int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the Whisper decoder to obtain the logits and probabilities for the next token.
|
||||||
|
* Make sure to call whisper_encode() first.
|
||||||
|
* tokens + n_tokens is the provided context for the decoder.
|
||||||
|
* n_past is the number of tokens to use from previous decoder calls.
|
||||||
|
* Returns 0 on success
|
||||||
|
* TODO: add support for multiple decoders
|
||||||
|
*/
|
||||||
|
int whisper_decode(Pointer ctx, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param ctx
|
||||||
|
* @param state
|
||||||
|
* @param tokens Pointer to int tokens
|
||||||
|
* @param n_tokens
|
||||||
|
* @param n_past
|
||||||
|
* @param n_threads
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
int whisper_decode_with_state(Pointer ctx, Pointer state, Pointer tokens, int n_tokens, int n_past, int n_threads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the provided text into tokens.
|
||||||
|
* The tokens pointer must be large enough to hold the resulting tokens.
|
||||||
|
* Returns the number of tokens on success, no more than n_max_tokens
|
||||||
|
* Returns -1 on failure
|
||||||
|
* TODO: not sure if correct
|
||||||
|
*/
|
||||||
|
int whisper_tokenize(Pointer ctx, String text, Pointer tokens, int n_max_tokens);
|
||||||
|
|
||||||
|
/** Largest language id (i.e. number of available languages - 1) */
|
||||||
|
int whisper_lang_max_id();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the id of the specified language, returns -1 if not found.
|
||||||
|
* Examples:
|
||||||
|
* "de" -> 2
|
||||||
|
* "german" -> 2
|
||||||
|
*/
|
||||||
|
int whisper_lang_id(String lang);
|
||||||
|
|
||||||
|
/** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
|
||||||
|
String whisper_lang_str(int id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use mel data at offset_ms to try and auto-detect the spoken language.
|
||||||
|
* Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
|
||||||
|
* Returns the top language id or negative on failure
|
||||||
|
* If not null, fills the lang_probs array with the probabilities of all languages
|
||||||
|
* The array must be whisper_lang_max_id() + 1 in size
|
||||||
|
*
|
||||||
|
* ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
|
||||||
|
*/
|
||||||
|
int whisper_lang_auto_detect(Pointer ctx, int offset_ms, int n_threads, float[] lang_probs);
|
||||||
|
|
||||||
|
int whisper_lang_auto_detect_with_state(Pointer ctx, Pointer state, int offset_ms, int n_threads, float[] lang_probs);
|
||||||
|
|
||||||
|
int whisper_n_len (Pointer ctx); // mel length
|
||||||
|
int whisper_n_len_from_state(Pointer state); // mel length
|
||||||
|
int whisper_n_vocab (Pointer ctx);
|
||||||
|
int whisper_n_text_ctx (Pointer ctx);
|
||||||
|
int whisper_n_audio_ctx (Pointer ctx);
|
||||||
|
int whisper_is_multilingual (Pointer ctx);
|
||||||
|
|
||||||
|
int whisper_model_n_vocab (Pointer ctx);
|
||||||
|
int whisper_model_n_audio_ctx (Pointer ctx);
|
||||||
|
int whisper_model_n_audio_state(Pointer ctx);
|
||||||
|
int whisper_model_n_audio_head (Pointer ctx);
|
||||||
|
int whisper_model_n_audio_layer(Pointer ctx);
|
||||||
|
int whisper_model_n_text_ctx (Pointer ctx);
|
||||||
|
int whisper_model_n_text_state (Pointer ctx);
|
||||||
|
int whisper_model_n_text_head (Pointer ctx);
|
||||||
|
int whisper_model_n_text_layer (Pointer ctx);
|
||||||
|
int whisper_model_n_mels (Pointer ctx);
|
||||||
|
int whisper_model_ftype (Pointer ctx);
|
||||||
|
int whisper_model_type (Pointer ctx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Token logits obtained from the last call to whisper_decode().
|
||||||
|
* The logits for the last token are stored in the last row
|
||||||
|
* Rows: n_tokens
|
||||||
|
* Cols: n_vocab
|
||||||
|
*/
|
||||||
|
float[] whisper_get_logits (Pointer ctx);
|
||||||
|
float[] whisper_get_logits_from_state(Pointer state);
|
||||||
|
|
||||||
|
// Token Id -> String. Uses the vocabulary in the provided context
|
||||||
|
String whisper_token_to_str(Pointer ctx, int token);
|
||||||
|
String whisper_model_type_readable(Pointer ctx);
|
||||||
|
|
||||||
|
// Special tokens
|
||||||
|
int whisper_token_eot (Pointer ctx);
|
||||||
|
int whisper_token_sot (Pointer ctx);
|
||||||
|
int whisper_token_prev(Pointer ctx);
|
||||||
|
int whisper_token_solm(Pointer ctx);
|
||||||
|
int whisper_token_not (Pointer ctx);
|
||||||
|
int whisper_token_beg (Pointer ctx);
|
||||||
|
int whisper_token_lang(Pointer ctx, int lang_id);
|
||||||
|
|
||||||
|
// Task tokens
|
||||||
|
int whisper_token_translate (Pointer ctx);
|
||||||
|
int whisper_token_transcribe(Pointer ctx);
|
||||||
|
|
||||||
|
// Performance information from the default state.
|
||||||
|
void whisper_print_timings(Pointer ctx);
|
||||||
|
void whisper_reset_timings(Pointer ctx);
|
||||||
|
|
||||||
|
// Note: Even if `whisper_full_params is stripped back to just 4 ints, JNA throws "Invalid memory access"
|
||||||
|
// when `whisper_full_default_params()` tries to return a struct.
|
||||||
|
// WhisperFullParams whisper_full_default_params(int strategy);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides default params which can be used with `whisper_full()` etc.
|
||||||
|
* Because this function allocates memory for the params, the caller must call either:
|
||||||
|
* - call `whisper_free_params()`
|
||||||
|
* - `Native.free(Pointer.nativeValue(pointer));`
|
||||||
|
*
|
||||||
|
* @param strategy - WhisperSamplingStrategy.value
|
||||||
|
*/
|
||||||
|
Pointer whisper_full_default_params_by_ref(int strategy);
|
||||||
|
|
||||||
|
void whisper_free_params(Pointer params);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||||
|
* Not thread safe for same context
|
||||||
|
* Uses the specified decoding strategy to obtain the text.
|
||||||
|
*/
|
||||||
|
int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
|
||||||
|
|
||||||
|
int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
|
||||||
|
|
||||||
|
// Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||||
|
// Result is stored in the default state of the context
|
||||||
|
// Not thread safe if executed in parallel on the same context.
|
||||||
|
// It seems this approach can offer some speedup in some cases.
|
||||||
|
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||||
|
int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of generated text segments.
|
||||||
|
* A segment can be a few words, a sentence, or even a paragraph.
|
||||||
|
* @param ctx Pointer to WhisperContext
|
||||||
|
*/
|
||||||
|
int whisper_full_n_segments (Pointer ctx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param state Pointer to WhisperState
|
||||||
|
*/
|
||||||
|
int whisper_full_n_segments_from_state(Pointer state);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Language id associated with the context's default state.
|
||||||
|
* @param ctx Pointer to WhisperContext
|
||||||
|
*/
|
||||||
|
int whisper_full_lang_id(Pointer ctx);
|
||||||
|
|
||||||
|
/** Language id associated with the provided state */
|
||||||
|
int whisper_full_lang_id_from_state(Pointer state);
|
||||||
|
|
||||||
|
|
||||||
|
/** Get the start time of the specified segment. */
|
||||||
|
long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
|
||||||
|
|
||||||
|
/** Get the start time of the specified segment from the state. */
|
||||||
|
long whisper_full_get_segment_t0_from_state(Pointer state, int i_segment);
|
||||||
|
|
||||||
|
/** Get the end time of the specified segment. */
|
||||||
|
long whisper_full_get_segment_t1(Pointer ctx, int i_segment);
|
||||||
|
|
||||||
|
/** Get the end time of the specified segment from the state. */
|
||||||
|
long whisper_full_get_segment_t1_from_state(Pointer state, int i_segment);
|
||||||
|
|
||||||
|
/** Get the text of the specified segment. */
|
||||||
|
String whisper_full_get_segment_text(Pointer ctx, int i_segment);
|
||||||
|
|
||||||
|
/** Get the text of the specified segment from the state. */
|
||||||
|
String whisper_full_get_segment_text_from_state(Pointer state, int i_segment);
|
||||||
|
|
||||||
|
/** Get the number of tokens in the specified segment. */
|
||||||
|
int whisper_full_n_tokens(Pointer ctx, int i_segment);
|
||||||
|
|
||||||
|
/** Get the number of tokens in the specified segment from the state. */
|
||||||
|
int whisper_full_n_tokens_from_state(Pointer state, int i_segment);
|
||||||
|
|
||||||
|
/** Get the token text of the specified token in the specified segment. */
|
||||||
|
String whisper_full_get_token_text(Pointer ctx, int i_segment, int i_token);
|
||||||
|
|
||||||
|
|
||||||
|
/** Get the token text of the specified token in the specified segment from the state. */
|
||||||
|
String whisper_full_get_token_text_from_state(Pointer ctx, Pointer state, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get the token ID of the specified token in the specified segment. */
|
||||||
|
int whisper_full_get_token_id(Pointer ctx, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get the token ID of the specified token in the specified segment from the state. */
|
||||||
|
int whisper_full_get_token_id_from_state(Pointer state, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get token data for the specified token in the specified segment. */
|
||||||
|
WhisperTokenData whisper_full_get_token_data(Pointer ctx, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get token data for the specified token in the specified segment from the state. */
|
||||||
|
WhisperTokenData whisper_full_get_token_data_from_state(Pointer state, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get the probability of the specified token in the specified segment. */
|
||||||
|
float whisper_full_get_token_p(Pointer ctx, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/** Get the probability of the specified token in the specified segment from the state. */
|
||||||
|
float whisper_full_get_token_p_from_state(Pointer state, int i_segment, int i_token);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmark function for memcpy.
|
||||||
|
*
|
||||||
|
* @param nThreads Number of threads to use for the benchmark.
|
||||||
|
* @return The result of the benchmark.
|
||||||
|
*/
|
||||||
|
int whisper_bench_memcpy(int nThreads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmark function for memcpy as a string.
|
||||||
|
*
|
||||||
|
* @param nThreads Number of threads to use for the benchmark.
|
||||||
|
* @return The result of the benchmark as a string.
|
||||||
|
*/
|
||||||
|
String whisper_bench_memcpy_str(int nThreads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmark function for ggml_mul_mat.
|
||||||
|
*
|
||||||
|
* @param nThreads Number of threads to use for the benchmark.
|
||||||
|
* @return The result of the benchmark.
|
||||||
|
*/
|
||||||
|
int whisper_bench_ggml_mul_mat(int nThreads);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Benchmark function for ggml_mul_mat as a string.
|
||||||
|
*
|
||||||
|
* @param nThreads Number of threads to use for the benchmark.
|
||||||
|
* @return The result of the benchmark as a string.
|
||||||
|
*/
|
||||||
|
String whisper_bench_ggml_mul_mat_str(int nThreads);
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.bean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by litonglinux@qq.com on 10/21/2023_7:48 AM
|
||||||
|
*/
|
||||||
|
public class WhisperSegment {
|
||||||
|
private long start, end;
|
||||||
|
private String sentence;
|
||||||
|
|
||||||
|
public WhisperSegment() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public WhisperSegment(long start, long end, String sentence) {
|
||||||
|
this.start = start;
|
||||||
|
this.end = end;
|
||||||
|
this.sentence = sentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getStart() {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getEnd() {
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSentence() {
|
||||||
|
return sentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setStart(long start) {
|
||||||
|
this.start = start;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEnd(long end) {
|
||||||
|
this.end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSentence(String sentence) {
|
||||||
|
this.sentence = sentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "[" + start + " --> " + end + "]:" + sentence;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,24 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.callbacks;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback before the encoder starts.
|
||||||
|
* If not null, called before the encoder starts.
|
||||||
|
* If it returns false, the computation is aborted.
|
||||||
|
*/
|
||||||
|
public interface WhisperEncoderBeginCallback extends Callback {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback method before the encoder starts.
|
||||||
|
*
|
||||||
|
* @param ctx The whisper context.
|
||||||
|
* @param state The whisper state.
|
||||||
|
* @param user_data User data.
|
||||||
|
* @return True if the computation should proceed, false otherwise.
|
||||||
|
*/
|
||||||
|
boolean callback(Pointer ctx, Pointer state, Pointer user_data);
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.callbacks;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperTokenData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback to filter logits.
|
||||||
|
* Can be used to modify the logits before sampling.
|
||||||
|
* If not null, called after applying temperature to logits.
|
||||||
|
*/
|
||||||
|
public interface WhisperLogitsFilterCallback extends Callback {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback method to filter logits.
|
||||||
|
*
|
||||||
|
* @param ctx The whisper context.
|
||||||
|
* @param state The whisper state.
|
||||||
|
* @param tokens The array of whisper_token_data.
|
||||||
|
* @param n_tokens The number of tokens.
|
||||||
|
* @param logits The array of logits.
|
||||||
|
* @param user_data User data.
|
||||||
|
*/
|
||||||
|
void callback(Pointer ctx, Pointer state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data);
|
||||||
|
}
|
@ -0,0 +1,24 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.callbacks;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback for the text segment.
|
||||||
|
* Called on every newly generated text segment.
|
||||||
|
* Use the whisper_full_...() functions to obtain the text segments.
|
||||||
|
*/
|
||||||
|
public interface WhisperNewSegmentCallback extends Callback {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback method for the text segment.
|
||||||
|
*
|
||||||
|
* @param ctx The whisper context.
|
||||||
|
* @param state The whisper state.
|
||||||
|
* @param n_new The number of newly generated text segments.
|
||||||
|
* @param user_data User data.
|
||||||
|
*/
|
||||||
|
void callback(Pointer ctx, Pointer state, int n_new, Pointer user_data);
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.callbacks;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import io.github.ggerganov.whispercpp.WhisperContext;
|
||||||
|
import io.github.ggerganov.whispercpp.model.WhisperState;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback for progress updates.
|
||||||
|
*/
|
||||||
|
public interface WhisperProgressCallback extends Callback {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback method for progress updates.
|
||||||
|
*
|
||||||
|
* @param ctx The whisper context.
|
||||||
|
* @param state The whisper state.
|
||||||
|
* @param progress The progress value.
|
||||||
|
* @param user_data User data.
|
||||||
|
*/
|
||||||
|
void callback(Pointer ctx, Pointer state, int progress, Pointer user_data);
|
||||||
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.ggml;
|
||||||
|
|
||||||
|
public class GgmlTensor {
|
||||||
|
}
|
@ -0,0 +1,18 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.ggml;
|
||||||
|
|
||||||
|
public enum GgmlType {
|
||||||
|
GGML_TYPE_F32,
|
||||||
|
GGML_TYPE_F16,
|
||||||
|
GGML_TYPE_Q4_0,
|
||||||
|
GGML_TYPE_Q4_1,
|
||||||
|
REMOVED_GGML_TYPE_Q4_2, // support has been removed
|
||||||
|
REMOVED_GGML_TYPE_Q4_3, // support has been removed
|
||||||
|
GGML_TYPE_Q5_0,
|
||||||
|
GGML_TYPE_Q5_1,
|
||||||
|
GGML_TYPE_Q8_0,
|
||||||
|
GGML_TYPE_Q8_1,
|
||||||
|
GGML_TYPE_I8,
|
||||||
|
GGML_TYPE_I16,
|
||||||
|
GGML_TYPE_I32,
|
||||||
|
GGML_TYPE_COUNT,
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.model;
|
||||||
|
|
||||||
|
public enum EModel {
|
||||||
|
MODEL_UNKNOWN,
|
||||||
|
MODEL_TINY,
|
||||||
|
MODEL_BASE,
|
||||||
|
MODEL_SMALL,
|
||||||
|
MODEL_MEDIUM,
|
||||||
|
MODEL_LARGE,
|
||||||
|
}
|
@ -0,0 +1,49 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import io.github.ggerganov.whispercpp.ggml.GgmlTensor;
|
||||||
|
import io.github.ggerganov.whispercpp.model.EModel;
|
||||||
|
|
||||||
|
public class WhisperModel {
|
||||||
|
// EModel type = EModel.MODEL_UNKNOWN;
|
||||||
|
//
|
||||||
|
// WhisperHParams hparams;
|
||||||
|
// WhisperFilters filters;
|
||||||
|
//
|
||||||
|
// // encoder.positional_embedding
|
||||||
|
// GgmlTensor e_pe;
|
||||||
|
//
|
||||||
|
// // encoder.conv1
|
||||||
|
// GgmlTensor e_conv_1_w;
|
||||||
|
// GgmlTensor e_conv_1_b;
|
||||||
|
//
|
||||||
|
// // encoder.conv2
|
||||||
|
// GgmlTensor e_conv_2_w;
|
||||||
|
// GgmlTensor e_conv_2_b;
|
||||||
|
//
|
||||||
|
// // encoder.ln_post
|
||||||
|
// GgmlTensor e_ln_w;
|
||||||
|
// GgmlTensor e_ln_b;
|
||||||
|
//
|
||||||
|
// // decoder.positional_embedding
|
||||||
|
// GgmlTensor d_pe;
|
||||||
|
//
|
||||||
|
// // decoder.token_embedding
|
||||||
|
// GgmlTensor d_te;
|
||||||
|
//
|
||||||
|
// // decoder.ln
|
||||||
|
// GgmlTensor d_ln_w;
|
||||||
|
// GgmlTensor d_ln_b;
|
||||||
|
//
|
||||||
|
// std::vector<whisper_layer_encoder> layers_encoder;
|
||||||
|
// std::vector<whisper_layer_decoder> layers_decoder;
|
||||||
|
//
|
||||||
|
// // context
|
||||||
|
// struct ggml_context * ctx;
|
||||||
|
//
|
||||||
|
// // the model memory buffer is read-only and can be shared between processors
|
||||||
|
// std::vector<uint8_t> * buf;
|
||||||
|
//
|
||||||
|
// // tensors
|
||||||
|
// int n_loaded;
|
||||||
|
// Map<String, GgmlTensor> tensors;
|
||||||
|
}
|
@ -0,0 +1,62 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.model;
|
||||||
|
|
||||||
|
import com.sun.jna.Callback;
|
||||||
|
import com.sun.jna.Pointer;
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
|
||||||
|
|
||||||
|
public class WhisperModelLoader extends Structure {
|
||||||
|
public Pointer context;
|
||||||
|
public ReadFunction read;
|
||||||
|
public EOFFunction eof;
|
||||||
|
public CloseFunction close;
|
||||||
|
|
||||||
|
public static class ReadFunction implements Callback {
|
||||||
|
public Pointer invoke(Pointer ctx, Pointer output, int readSize) {
|
||||||
|
// TODO
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class EOFFunction implements Callback {
|
||||||
|
public boolean invoke(Pointer ctx) {
|
||||||
|
// TODO
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class CloseFunction implements Callback {
|
||||||
|
public void invoke(Pointer ctx) {
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// public WhisperModelLoader(Pointer p) {
|
||||||
|
// super(p);
|
||||||
|
// read = new ReadFunction();
|
||||||
|
// eof = new EOFFunction();
|
||||||
|
// close = new CloseFunction();
|
||||||
|
// read.setCallback(this);
|
||||||
|
// eof.setCallback(this);
|
||||||
|
// close.setCallback(this);
|
||||||
|
// read.write();
|
||||||
|
// eof.write();
|
||||||
|
// close.write();
|
||||||
|
// }
|
||||||
|
|
||||||
|
public WhisperModelLoader() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface ReadCallback extends Callback {
|
||||||
|
Pointer invoke(Pointer ctx, Pointer output, int readSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface EOFCallback extends Callback {
|
||||||
|
boolean invoke(Pointer ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
public interface CloseCallback extends Callback {
|
||||||
|
void invoke(Pointer ctx);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.model;
|
||||||
|
|
||||||
|
public class WhisperState {
|
||||||
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.model;
|
||||||
|
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structure representing token data.
|
||||||
|
*/
|
||||||
|
public class WhisperTokenData extends Structure {
|
||||||
|
|
||||||
|
/** Token ID. */
|
||||||
|
public int id;
|
||||||
|
|
||||||
|
/** Forced timestamp token ID. */
|
||||||
|
public int tid;
|
||||||
|
|
||||||
|
/** Probability of the token. */
|
||||||
|
public float p;
|
||||||
|
|
||||||
|
/** Log probability of the token. */
|
||||||
|
public float plog;
|
||||||
|
|
||||||
|
/** Probability of the timestamp token. */
|
||||||
|
public float pt;
|
||||||
|
|
||||||
|
/** Sum of probabilities of all timestamp tokens. */
|
||||||
|
public float ptsum;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start time of the token (token-level timestamp data).
|
||||||
|
* Do not use if you haven't computed token-level timestamps.
|
||||||
|
*/
|
||||||
|
public long t0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End time of the token (token-level timestamp data).
|
||||||
|
* Do not use if you haven't computed token-level timestamps.
|
||||||
|
*/
|
||||||
|
public long t1;
|
||||||
|
|
||||||
|
/** Voice length of the token. */
|
||||||
|
public float vlen;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class BeamSearchParams extends Structure {
|
||||||
|
/** ref: <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265">...</a> */
|
||||||
|
public int beam_size;
|
||||||
|
|
||||||
|
/** ref: <a href="https://arxiv.org/pdf/2204.05424.pdf">...</a> */
|
||||||
|
public float patience;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Arrays.asList("beam_size", "patience");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import com.sun.jna.IntegerType;
|
||||||
|
|
||||||
|
import java.util.function.BooleanSupplier;
|
||||||
|
|
||||||
|
public class CBool extends IntegerType implements BooleanSupplier {
|
||||||
|
public static final int SIZE = 1;
|
||||||
|
public static final CBool FALSE = new CBool(0);
|
||||||
|
public static final CBool TRUE = new CBool(1);
|
||||||
|
|
||||||
|
|
||||||
|
public CBool() {
|
||||||
|
this(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CBool(long value) {
|
||||||
|
super(SIZE, value, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean getAsBoolean() {
|
||||||
|
return intValue() == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return intValue() == 1 ? "true" : "false";
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import com.sun.jna.Structure;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class GreedyParams extends Structure {
|
||||||
|
/** <a href="https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264">...</a> */
|
||||||
|
public int best_of;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Collections.singletonList("best_of");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import com.sun.jna.*;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parameters for the whisper_init_from_file_with_params() function.
|
||||||
|
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
||||||
|
* whisper_context_default_params()
|
||||||
|
*/
|
||||||
|
public class WhisperContextParams extends Structure {
|
||||||
|
|
||||||
|
public WhisperContextParams(Pointer p) {
|
||||||
|
super(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Use GPU for inference Number (default = true) */
|
||||||
|
public CBool use_gpu;
|
||||||
|
|
||||||
|
/** Use GPU for inference Number (default = true) */
|
||||||
|
public void useGpu(boolean enable) {
|
||||||
|
use_gpu = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Arrays.asList("use_gpu");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class WhisperFilters {
|
||||||
|
int n_mel;
|
||||||
|
int n_fft;
|
||||||
|
|
||||||
|
List<Float> data;
|
||||||
|
}
|
@ -0,0 +1,326 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
import com.sun.jna.*;
|
||||||
|
import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
|
||||||
|
import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
|
||||||
|
import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
|
||||||
|
import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parameters for the whisper_full() function.
|
||||||
|
* If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
|
||||||
|
* whisper_full_default_params()
|
||||||
|
*/
|
||||||
|
public class WhisperFullParams extends Structure {
|
||||||
|
|
||||||
|
public WhisperFullParams(Pointer p) {
|
||||||
|
super(p);
|
||||||
|
// super(p, ALIGN_MSVC);
|
||||||
|
// super(p, ALIGN_GNUC);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sampling strategy for whisper_full() function. */
|
||||||
|
public int strategy;
|
||||||
|
|
||||||
|
/** Number of threads. (default = 4) */
|
||||||
|
public int n_threads;
|
||||||
|
|
||||||
|
/** Maximum tokens to use from past text as a prompt for the decoder. (default = 16384) */
|
||||||
|
public int n_max_text_ctx;
|
||||||
|
|
||||||
|
/** Start offset in milliseconds. (default = 0) */
|
||||||
|
public int offset_ms;
|
||||||
|
|
||||||
|
/** Audio duration to process in milliseconds. (default = 0) */
|
||||||
|
public int duration_ms;
|
||||||
|
|
||||||
|
/** Translate flag. (default = false) */
|
||||||
|
public CBool translate;
|
||||||
|
|
||||||
|
/** The compliment of translateMode() */
|
||||||
|
public void transcribeMode() {
|
||||||
|
translate = CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The compliment of transcribeMode() */
|
||||||
|
public void translateMode() {
|
||||||
|
translate = CBool.TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
||||||
|
public CBool no_context;
|
||||||
|
|
||||||
|
/** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. (default = true) */
|
||||||
|
public void enableContext(boolean enable) {
|
||||||
|
no_context = enable ? CBool.FALSE : CBool.TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Generate timestamps or not? */
|
||||||
|
public CBool no_timestamps;
|
||||||
|
|
||||||
|
/** Flag to force single segment output (useful for streaming). (default = false) */
|
||||||
|
public CBool single_segment;
|
||||||
|
|
||||||
|
/** Flag to force single segment output (useful for streaming). (default = false) */
|
||||||
|
public void singleSegment(boolean single) {
|
||||||
|
single_segment = single ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||||
|
public CBool print_special;
|
||||||
|
|
||||||
|
/** Flag to print special tokens (e.g., <SOT>, <EOT>, <BEG>, etc.). (default = false) */
|
||||||
|
public void printSpecial(boolean enable) {
|
||||||
|
print_special = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to print progress information. (default = true) */
|
||||||
|
public CBool print_progress;
|
||||||
|
|
||||||
|
/** Flag to print progress information. (default = true) */
|
||||||
|
public void printProgress(boolean enable) {
|
||||||
|
print_progress = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
||||||
|
public CBool print_realtime;
|
||||||
|
|
||||||
|
/** Flag to print results from within whisper.cpp (avoid it, use callback instead). (default = true) */
|
||||||
|
public void printRealtime(boolean enable) {
|
||||||
|
print_realtime = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
||||||
|
public CBool print_timestamps;
|
||||||
|
|
||||||
|
/** Flag to print timestamps for each text segment when printing realtime. (default = true) */
|
||||||
|
public void printTimestamps(boolean enable) {
|
||||||
|
print_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
||||||
|
public CBool token_timestamps;
|
||||||
|
|
||||||
|
/** [EXPERIMENTAL] Flag to enable token-level timestamps. (default = false) */
|
||||||
|
public void tokenTimestamps(boolean enable) {
|
||||||
|
token_timestamps = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** [EXPERIMENTAL] Timestamp token probability threshold (~0.01). (default = 0.01) */
|
||||||
|
public float thold_pt;
|
||||||
|
|
||||||
|
/** [EXPERIMENTAL] Timestamp token sum probability threshold (~0.01). */
|
||||||
|
public float thold_ptsum;
|
||||||
|
|
||||||
|
/** Maximum segment length in characters. (default = 0) */
|
||||||
|
public int max_len;
|
||||||
|
|
||||||
|
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
||||||
|
public CBool split_on_word;
|
||||||
|
|
||||||
|
/** Flag to split on word rather than on token (when used with max_len). (default = false) */
|
||||||
|
public void splitOnWord(boolean enable) {
|
||||||
|
split_on_word = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Maximum tokens per segment (0, default = no limit) */
|
||||||
|
public int max_tokens;
|
||||||
|
|
||||||
|
/** Overwrite the audio context size (0 = use default). */
|
||||||
|
public int audio_ctx;
|
||||||
|
|
||||||
|
/** Enable tinydiarize (default = false) */
|
||||||
|
public CBool tdrz_enable;
|
||||||
|
|
||||||
|
/** Enable tinydiarize (default = false) */
|
||||||
|
public void tdrzEnable(boolean enable) {
|
||||||
|
tdrz_enable = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Regular expression matching tokens to suppress. */
|
||||||
|
public String suppress_regex;
|
||||||
|
|
||||||
|
/** Tokens to provide to the whisper decoder as an initial prompt.
|
||||||
|
* These are prepended to any existing text context from a previous call. */
|
||||||
|
public String initial_prompt;
|
||||||
|
|
||||||
|
/** Prompt tokens. (int*) */
|
||||||
|
public Pointer prompt_tokens;
|
||||||
|
|
||||||
|
public void setPromptTokens(int[] tokens) {
|
||||||
|
Memory mem = new Memory(tokens.length * 4L);
|
||||||
|
mem.write(0, tokens, 0, tokens.length);
|
||||||
|
prompt_tokens = mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Number of prompt tokens. */
|
||||||
|
public int prompt_n_tokens;
|
||||||
|
|
||||||
|
/** Language for auto-detection.
|
||||||
|
* For auto-detection, set to `null`, `""`, or "auto". */
|
||||||
|
public String language;
|
||||||
|
|
||||||
|
/** Flag to indicate whether to detect language automatically. */
|
||||||
|
public CBool detect_language;
|
||||||
|
|
||||||
|
/** Flag to indicate whether to detect language automatically. */
|
||||||
|
public void detectLanguage(boolean enable) {
|
||||||
|
detect_language = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common decoding parameters.
|
||||||
|
|
||||||
|
/** Flag to suppress blank tokens. */
|
||||||
|
public CBool suppress_blank;
|
||||||
|
|
||||||
|
public void suppressBlanks(boolean enable) {
|
||||||
|
suppress_blank = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Flag to suppress non-speech tokens. */
|
||||||
|
public CBool suppress_nst;
|
||||||
|
|
||||||
|
/** Flag to suppress non-speech tokens. */
|
||||||
|
public void suppressNonSpeechTokens(boolean enable) {
|
||||||
|
suppress_nst = enable ? CBool.TRUE : CBool.FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Initial decoding temperature. */
|
||||||
|
public float temperature;
|
||||||
|
|
||||||
|
/** Maximum initial timestamp. */
|
||||||
|
public float max_initial_ts;
|
||||||
|
|
||||||
|
/** Length penalty. */
|
||||||
|
public float length_penalty;
|
||||||
|
|
||||||
|
// Fallback parameters.
|
||||||
|
|
||||||
|
/** Temperature increment. */
|
||||||
|
public float temperature_inc;
|
||||||
|
|
||||||
|
/** Entropy threshold (similar to OpenAI's "compression_ratio_threshold"). */
|
||||||
|
public float entropy_thold;
|
||||||
|
|
||||||
|
/** Log probability threshold. */
|
||||||
|
public float logprob_thold;
|
||||||
|
|
||||||
|
/** No speech threshold. */
|
||||||
|
public float no_speech_thold;
|
||||||
|
|
||||||
|
/** Greedy decoding parameters. */
|
||||||
|
public GreedyParams greedy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Beam search decoding parameters.
|
||||||
|
*/
|
||||||
|
public BeamSearchParams beam_search;
|
||||||
|
|
||||||
|
public void setBestOf(int bestOf) {
|
||||||
|
if (greedy == null) {
|
||||||
|
greedy = new GreedyParams();
|
||||||
|
}
|
||||||
|
greedy.best_of = bestOf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBeamSize(int beamSize) {
|
||||||
|
if (beam_search == null) {
|
||||||
|
beam_search = new BeamSearchParams();
|
||||||
|
}
|
||||||
|
beam_search.beam_size = beamSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBeamSizeAndPatience(int beamSize, float patience) {
|
||||||
|
if (beam_search == null) {
|
||||||
|
beam_search = new BeamSearchParams();
|
||||||
|
}
|
||||||
|
beam_search.beam_size = beamSize;
|
||||||
|
beam_search.patience = patience;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback for every newly generated text segment.
|
||||||
|
* WhisperNewSegmentCallback
|
||||||
|
*/
|
||||||
|
public Pointer new_segment_callback;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User data for the new_segment_callback.
|
||||||
|
*/
|
||||||
|
public Pointer new_segment_callback_user_data;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback on each progress update.
|
||||||
|
* WhisperProgressCallback
|
||||||
|
*/
|
||||||
|
public Pointer progress_callback;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User data for the progress_callback.
|
||||||
|
*/
|
||||||
|
public Pointer progress_callback_user_data;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback each time before the encoder starts.
|
||||||
|
* WhisperEncoderBeginCallback
|
||||||
|
*/
|
||||||
|
public Pointer encoder_begin_callback;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User data for the encoder_begin_callback.
|
||||||
|
*/
|
||||||
|
public Pointer encoder_begin_callback_user_data;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback by each decoder to filter obtained logits.
|
||||||
|
* WhisperLogitsFilterCallback
|
||||||
|
*/
|
||||||
|
public Pointer logits_filter_callback;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User data for the logits_filter_callback.
|
||||||
|
*/
|
||||||
|
public Pointer logits_filter_callback_user_data;
|
||||||
|
|
||||||
|
|
||||||
|
public void setNewSegmentCallback(WhisperNewSegmentCallback callback) {
|
||||||
|
new_segment_callback = CallbackReference.getFunctionPointer(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgressCallback(WhisperProgressCallback callback) {
|
||||||
|
progress_callback = CallbackReference.getFunctionPointer(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEncoderBeginCallbackeginCallbackCallback(WhisperEncoderBeginCallback callback) {
|
||||||
|
encoder_begin_callback = CallbackReference.getFunctionPointer(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLogitsFilterCallback(WhisperLogitsFilterCallback callback) {
|
||||||
|
logits_filter_callback = CallbackReference.getFunctionPointer(callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Grammar stuff */
|
||||||
|
public Pointer grammar_rules;
|
||||||
|
public long n_grammar_rules;
|
||||||
|
public long i_start_rule;
|
||||||
|
public float grammar_penalty;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getFieldOrder() {
|
||||||
|
return Arrays.asList("strategy", "n_threads", "n_max_text_ctx", "offset_ms", "duration_ms", "translate",
|
||||||
|
"no_context", "single_segment", "no_timestamps",
|
||||||
|
"print_special", "print_progress", "print_realtime", "print_timestamps", "token_timestamps",
|
||||||
|
"thold_pt", "thold_ptsum", "max_len", "split_on_word", "max_tokens", "audio_ctx",
|
||||||
|
"tdrz_enable", "suppress_regex", "initial_prompt", "prompt_tokens", "prompt_n_tokens", "language", "detect_language",
|
||||||
|
"suppress_blank", "suppress_nst", "temperature", "max_initial_ts", "length_penalty",
|
||||||
|
"temperature_inc", "entropy_thold", "logprob_thold", "no_speech_thold", "greedy", "beam_search",
|
||||||
|
"new_segment_callback", "new_segment_callback_user_data",
|
||||||
|
"progress_callback", "progress_callback_user_data",
|
||||||
|
"encoder_begin_callback", "encoder_begin_callback_user_data",
|
||||||
|
"logits_filter_callback", "logits_filter_callback_user_data",
|
||||||
|
"grammar_rules", "n_grammar_rules", "i_start_rule", "grammar_penalty");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,15 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
public class WhisperHParams {
|
||||||
|
int n_vocab = 51864;
|
||||||
|
int n_audio_ctx = 1500;
|
||||||
|
int n_audio_state = 384;
|
||||||
|
int n_audio_head = 6;
|
||||||
|
int n_audio_layer = 4;
|
||||||
|
int n_text_ctx = 448;
|
||||||
|
int n_text_state = 384;
|
||||||
|
int n_text_head = 6;
|
||||||
|
int n_text_layer = 4;
|
||||||
|
int n_mels = 80;
|
||||||
|
int ftype = 1;
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package io.github.ggerganov.whispercpp.params;
|
||||||
|
|
||||||
|
/** Available sampling strategies */
|
||||||
|
public enum WhisperSamplingStrategy {
|
||||||
|
/** similar to OpenAI's GreedyDecoder */
|
||||||
|
WHISPER_SAMPLING_GREEDY,
|
||||||
|
|
||||||
|
/** similar to OpenAI's BeamSearchDecoder */
|
||||||
|
WHISPER_SAMPLING_BEAM_SEARCH
|
||||||
|
}
|
@ -0,0 +1,144 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import io.github.ggerganov.whispercpp.bean.WhisperSegment;
|
||||||
|
import io.github.ggerganov.whispercpp.params.CBool;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperFullParams;
|
||||||
|
import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import javax.sound.sampled.AudioInputStream;
|
||||||
|
import javax.sound.sampled.AudioSystem;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
class WhisperCppTest {
|
||||||
|
private static WhisperCpp whisper = new WhisperCpp();
|
||||||
|
private static boolean modelInitialised = false;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
static void init() throws FileNotFoundException {
|
||||||
|
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
||||||
|
// or you can provide the absolute path to the model file.
|
||||||
|
//String modelName = "../../models/ggml-tiny.bin";
|
||||||
|
String modelName = "../../models/ggml-tiny.en.bin";
|
||||||
|
try {
|
||||||
|
whisper.initContext(modelName);
|
||||||
|
//whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||||
|
//whisper.getJavaDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||||
|
modelInitialised = true;
|
||||||
|
} catch (FileNotFoundException ex) {
|
||||||
|
System.out.println("Model " + modelName + " not found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetDefaultFullParams_BeamSearch() {
|
||||||
|
// When
|
||||||
|
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||||
|
|
||||||
|
// Then
|
||||||
|
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH.ordinal(), params.strategy);
|
||||||
|
assertNotEquals(0, params.n_threads);
|
||||||
|
assertEquals(16384, params.n_max_text_ctx);
|
||||||
|
assertFalse(params.translate);
|
||||||
|
assertEquals(0.01f, params.thold_pt);
|
||||||
|
assertEquals(5, params.beam_search.beam_size);
|
||||||
|
assertEquals(-1.0f, params.beam_search.patience);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetDefaultFullParams_Greedy() {
|
||||||
|
// When
|
||||||
|
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||||
|
|
||||||
|
// Then
|
||||||
|
assertEquals(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY.ordinal(), params.strategy);
|
||||||
|
assertNotEquals(0, params.n_threads);
|
||||||
|
assertEquals(16384, params.n_max_text_ctx);
|
||||||
|
assertEquals(5, params.greedy.best_of);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFullTranscribe() throws Exception {
|
||||||
|
if (!modelInitialised) {
|
||||||
|
System.out.println("Model not initialised, skipping test");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given
|
||||||
|
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
||||||
|
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
||||||
|
|
||||||
|
byte[] b = new byte[audioInputStream.available()];
|
||||||
|
float[] floats = new float[b.length / 2];
|
||||||
|
|
||||||
|
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||||
|
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||||
|
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||||
|
params.print_progress = CBool.FALSE;
|
||||||
|
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||||
|
|
||||||
|
|
||||||
|
try {
|
||||||
|
audioInputStream.read(b);
|
||||||
|
|
||||||
|
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
||||||
|
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
||||||
|
floats[j] = intSample / 32767.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When
|
||||||
|
String result = whisper.fullTranscribe(params, floats);
|
||||||
|
|
||||||
|
// Then
|
||||||
|
System.err.println(result);
|
||||||
|
assertEquals("And so my fellow Americans ask not what your country can do for you " +
|
||||||
|
"ask what you can do for your country.",
|
||||||
|
result.replace(",", ""));
|
||||||
|
} finally {
|
||||||
|
audioInputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFullTranscribeWithTime() throws Exception {
|
||||||
|
if (!modelInitialised) {
|
||||||
|
System.out.println("Model not initialised, skipping test");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given
|
||||||
|
File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
|
||||||
|
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
|
||||||
|
|
||||||
|
byte[] b = new byte[audioInputStream.available()];
|
||||||
|
float[] floats = new float[b.length / 2];
|
||||||
|
|
||||||
|
//WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
||||||
|
WhisperFullParams params = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
|
||||||
|
params.setProgressCallback((ctx, state, progress, user_data) -> System.out.println("progress: " + progress));
|
||||||
|
params.print_progress = CBool.FALSE;
|
||||||
|
//params.initial_prompt = "and so my fellow Americans um, like";
|
||||||
|
|
||||||
|
try {
|
||||||
|
audioInputStream.read(b);
|
||||||
|
|
||||||
|
for (int i = 0, j = 0; i < b.length; i += 2, j++) {
|
||||||
|
int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
|
||||||
|
floats[j] = intSample / 32767.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<WhisperSegment> segments = whisper.fullTranscribeWithTime(params, floats);
|
||||||
|
assertTrue(segments.size() > 0, "The size of segments should be greater than 0");
|
||||||
|
for (WhisperSegment segment : segments) {
|
||||||
|
System.out.println(segment);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
audioInputStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,17 @@
|
|||||||
|
package io.github.ggerganov.whispercpp;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class WhisperJnaLibraryTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testWhisperPrint_system_info() {
|
||||||
|
String systemInfo = WhisperCppJnaLibrary.instance.whisper_print_system_info();
|
||||||
|
// eg: "AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0
|
||||||
|
// | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | "
|
||||||
|
System.out.println("System info: " + systemInfo);
|
||||||
|
assertTrue(systemInfo.length() > 10);
|
||||||
|
}
|
||||||
|
}
|
@ -41,7 +41,7 @@ make publish-npm
|
|||||||
|
|
||||||
## Sample run
|
## Sample run
|
||||||
|
|
||||||
```java
|
```text
|
||||||
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
$ node --experimental-wasm-threads --experimental-wasm-simd ../tests/test-whisper.js
|
||||||
|
|
||||||
whisper_model_load: loading model from 'whisper.bin'
|
whisper_model_load: loading model from 'whisper.bin'
|
||||||
@ -63,7 +63,7 @@ whisper_model_load: ggml ctx size = 140.60 MB
|
|||||||
whisper_model_load: memory size = 22.83 MB
|
whisper_model_load: memory size = 22.83 MB
|
||||||
whisper_model_load: model size = 140.54 MB
|
whisper_model_load: model size = 140.54 MB
|
||||||
|
|
||||||
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | NEON = 0 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 1 | BLAS = 0 |
|
||||||
|
|
||||||
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
operator(): processing 176000 samples, 11.0 sec, 8 threads, 1 processors, lang = en, task = transcribe ...
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ struct whisper_context * g_context;
|
|||||||
EMSCRIPTEN_BINDINGS(whisper) {
|
EMSCRIPTEN_BINDINGS(whisper) {
|
||||||
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
|
||||||
if (g_context == nullptr) {
|
if (g_context == nullptr) {
|
||||||
g_context = whisper_init_from_file(path_model.c_str());
|
g_context = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
|
||||||
if (g_context != nullptr) {
|
if (g_context != nullptr) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1 +1 @@
|
|||||||
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:function(f){(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f)},postMessage:function(msg){parentPort.postMessage(msg)},performance:global.performance||{now:function(){return Date.now()}}})}var initializedJS=false;var pendingNotifiedProxyingQueues=[];function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var instance=new WebAssembly.Instance(Module["wasmModule"],info);receiveInstance(instance);Module["wasmModule"]=null;return instance.exports};self.onunhandledrejection=e=>{throw e.reason??e};self.onmessage=e=>{try{if(e.data.cmd==="load"){Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=function(){postMessage({cmd:"callHandler",handler:handler,args:[...arguments]})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module).then(function(instance){Module=instance})}else if(e.data.cmd==="run"){Module["__performance_now_clock_drift"]=performance.now()-e.data.time;Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();pendingNotifiedProxyingQueues.forEach(queue=>{Module["executeNotifiedProxyingQueue"](queue)});pendingNotifiedProxyingQueues=[];initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){if(ex instanceof Module["ExitStatus"]){if(Module["keepRuntimeAlive"]()){}else{Module["__emscripten_thread_exit"](ex.status)}}else{throw ex}}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="processProxyingQueue"){if(initializedJS){Module["executeNotifiedProxyingQueue"](e.data.queue)}else{pendingNotifiedProxyingQueues.push(e.data.queue)}}else if(e.data.cmd){err("worker.js received unknown command "+e.data.cmd);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}};
|
"use strict";var Module={};var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";if(ENVIRONMENT_IS_NODE){var nodeWorkerThreads=require("worker_threads");var parentPort=nodeWorkerThreads.parentPort;parentPort.on("message",data=>onmessage({data:data}));var fs=require("fs");Object.assign(global,{self:global,require:require,Module:Module,location:{href:__filename},Worker:nodeWorkerThreads.Worker,importScripts:f=>(0,eval)(fs.readFileSync(f,"utf8")+"//# sourceURL="+f),postMessage:msg=>parentPort.postMessage(msg),performance:global.performance||{now:Date.now}})}var initializedJS=false;function threadPrintErr(){var text=Array.prototype.slice.call(arguments).join(" ");if(ENVIRONMENT_IS_NODE){fs.writeSync(2,text+"\n");return}console.error(text)}function threadAlert(){var text=Array.prototype.slice.call(arguments).join(" ");postMessage({cmd:"alert",text:text,threadId:Module["_pthread_self"]()})}var err=threadPrintErr;self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>{var module=Module["wasmModule"];Module["wasmModule"]=null;var instance=new WebAssembly.Instance(module,info);return receiveInstance(instance)};self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{if(e.data.cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{Module=instance;postMessage({"cmd":"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};Module["wasmModule"]=e.data.wasmModule;for(const handler of e.data.handlers){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})}}Module["wasmMemory"]=e.data.wasmMemory;Module["buffer"]=Module["wasmMemory"].buffer;Module["ENVIRONMENT_IS_PTHREAD"]=true;if(typeof e.data.urlOrBlob=="string"){importScripts(e.data.urlOrBlob)}else{var objectUrl=URL.createObjectURL(e.data.urlOrBlob);importScripts(objectUrl);URL.revokeObjectURL(objectUrl)}whisper_factory(Module)}else if(e.data.cmd==="run"){Module["__emscripten_thread_init"](e.data.pthread_ptr,0,0,1);Module["__emscripten_thread_mailbox_await"](e.data.pthread_ptr);Module["establishStackSpace"]();Module["PThread"].receiveObjectTransfer(e.data);Module["PThread"].threadInitTLS();if(!initializedJS){Module["__embind_initialize_bindings"]();initializedJS=true}try{Module["invokeEntryPoint"](e.data.start_routine,e.data.arg)}catch(ex){if(ex!="unwind"){throw ex}}}else if(e.data.cmd==="cancel"){if(Module["_pthread_self"]()){Module["__emscripten_thread_exit"](-1)}}else if(e.data.target==="setimmediate"){}else if(e.data.cmd==="checkMailbox"){if(initializedJS){Module["checkMailbox"]()}}else if(e.data.cmd){err(`worker.js received unknown command ${e.data.cmd}`);err(e.data)}}catch(ex){if(Module["__emscripten_thread_crashed"]){Module["__emscripten_thread_crashed"]()}throw ex}}self.onmessage=handleMessage;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "whisper.cpp",
|
"name": "whisper.cpp",
|
||||||
"version": "1.2.1",
|
"version": "1.7.4",
|
||||||
"description": "Whisper speech recognition",
|
"description": "Whisper speech recognition",
|
||||||
"main": "whisper.js",
|
"main": "whisper.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
File diff suppressed because one or more lines are too long
3
bindings/ruby/.gitignore
vendored
Normal file
3
bindings/ruby/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
LICENSE
|
||||||
|
pkg/
|
||||||
|
lib/whisper.*
|
245
bindings/ruby/README.md
Normal file
245
bindings/ruby/README.md
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
whispercpp
|
||||||
|
==========
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Ruby bindings for [whisper.cpp][], an interface of automatic speech recognition model.
|
||||||
|
|
||||||
|
Installation
|
||||||
|
------------
|
||||||
|
|
||||||
|
Install the gem and add to the application's Gemfile by executing:
|
||||||
|
|
||||||
|
$ bundle add whispercpp
|
||||||
|
|
||||||
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
||||||
|
|
||||||
|
$ gem install whispercpp
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
require "whisper"
|
||||||
|
|
||||||
|
whisper = Whisper::Context.new("base")
|
||||||
|
|
||||||
|
params = Whisper::Params.new(
|
||||||
|
language: "en",
|
||||||
|
offset: 10_000,
|
||||||
|
duration: 60_000,
|
||||||
|
max_text_tokens: 300,
|
||||||
|
translate: true,
|
||||||
|
print_timestamps: false,
|
||||||
|
initial_prompt: "Initial prompt here."
|
||||||
|
)
|
||||||
|
|
||||||
|
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
|
||||||
|
puts whole_text
|
||||||
|
end
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Preparing model ###
|
||||||
|
|
||||||
|
Some models are prepared up-front:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
base_en = Whisper::Model.pre_converted_models["base.en"]
|
||||||
|
whisper = Whisper::Context.new(base_en)
|
||||||
|
```
|
||||||
|
|
||||||
|
At first time you use a model, it is downloaded automatically. After that, downloaded cached file is used. To clear cache, call `#clear_cache`:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
Whisper::Model.pre_converted_models["base"].clear_cache
|
||||||
|
```
|
||||||
|
|
||||||
|
You also can use shorthand for pre-converted models:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
whisper = Whisper::Context.new("base.en")
|
||||||
|
```
|
||||||
|
|
||||||
|
You can see the list of prepared model names by `Whisper::Model.pre_converted_models.keys`:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
puts Whisper::Model.pre_converted_models.keys
|
||||||
|
# tiny
|
||||||
|
# tiny.en
|
||||||
|
# tiny-q5_1
|
||||||
|
# tiny.en-q5_1
|
||||||
|
# tiny-q8_0
|
||||||
|
# base
|
||||||
|
# base.en
|
||||||
|
# base-q5_1
|
||||||
|
# base.en-q5_1
|
||||||
|
# base-q8_0
|
||||||
|
# :
|
||||||
|
# :
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also use local model files you prepared:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
whisper = Whisper::Context.new("path/to/your/model.bin")
|
||||||
|
```
|
||||||
|
|
||||||
|
Or, you can download model files:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
whisper = Whisper::Context.new("https://example.net/uri/of/your/model.bin")
|
||||||
|
# Or
|
||||||
|
whisper = Whisper::Context.new(URI("https://example.net/uri/of/your/model.bin"))
|
||||||
|
```
|
||||||
|
|
||||||
|
See [models][] page for details.
|
||||||
|
|
||||||
|
### Preparing audio file ###
|
||||||
|
|
||||||
|
Currently, whisper.cpp accepts only 16-bit WAV files.
|
||||||
|
|
||||||
|
API
|
||||||
|
---
|
||||||
|
|
||||||
|
### Segments ###
|
||||||
|
|
||||||
|
Once `Whisper::Context#transcribe` called, you can retrieve segments by `#each_segment`:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
def format_time(time_ms)
|
||||||
|
sec, decimal_part = time_ms.divmod(1000)
|
||||||
|
min, sec = sec.divmod(60)
|
||||||
|
hour, min = min.divmod(60)
|
||||||
|
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
|
||||||
|
end
|
||||||
|
|
||||||
|
whisper
|
||||||
|
.transcribe("path/to/audio.wav", params)
|
||||||
|
.each_segment.with_index do |segment, index|
|
||||||
|
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
|
||||||
|
nth: index + 1,
|
||||||
|
st: format_time(segment.start_time),
|
||||||
|
ed: format_time(segment.end_time),
|
||||||
|
text: segment.text
|
||||||
|
}
|
||||||
|
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||||
|
puts line
|
||||||
|
end
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also add hook to params called on new segment:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
# Add hook before calling #transcribe
|
||||||
|
params.on_new_segment do |segment|
|
||||||
|
line = "[%{st} --> %{ed}] %{text}" % {
|
||||||
|
st: format_time(segment.start_time),
|
||||||
|
ed: format_time(segment.end_time),
|
||||||
|
text: segment.text
|
||||||
|
}
|
||||||
|
line << " (speaker turned)" if segment.speaker_next_turn?
|
||||||
|
puts line
|
||||||
|
end
|
||||||
|
|
||||||
|
whisper.transcribe("path/to/audio.wav", params)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Models ###
|
||||||
|
|
||||||
|
You can see model information:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
whisper = Whisper::Context.new("base")
|
||||||
|
model = whisper.model
|
||||||
|
|
||||||
|
model.n_vocab # => 51864
|
||||||
|
model.n_audio_ctx # => 1500
|
||||||
|
model.n_audio_state # => 512
|
||||||
|
model.n_audio_head # => 8
|
||||||
|
model.n_audio_layer # => 6
|
||||||
|
model.n_text_ctx # => 448
|
||||||
|
model.n_text_state # => 512
|
||||||
|
model.n_text_head # => 8
|
||||||
|
model.n_text_layer # => 6
|
||||||
|
model.n_mels # => 80
|
||||||
|
model.ftype # => 1
|
||||||
|
model.type # => "base"
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logging ###
|
||||||
|
|
||||||
|
You can set log callback:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
prefix = "[MyApp] "
|
||||||
|
log_callback = ->(level, buffer, user_data) {
|
||||||
|
case level
|
||||||
|
when Whisper::LOG_LEVEL_NONE
|
||||||
|
puts "#{user_data}none: #{buffer}"
|
||||||
|
when Whisper::LOG_LEVEL_INFO
|
||||||
|
puts "#{user_data}info: #{buffer}"
|
||||||
|
when Whisper::LOG_LEVEL_WARN
|
||||||
|
puts "#{user_data}warn: #{buffer}"
|
||||||
|
when Whisper::LOG_LEVEL_ERROR
|
||||||
|
puts "#{user_data}error: #{buffer}"
|
||||||
|
when Whisper::LOG_LEVEL_DEBUG
|
||||||
|
puts "#{user_data}debug: #{buffer}"
|
||||||
|
when Whisper::LOG_LEVEL_CONT
|
||||||
|
puts "#{user_data}same to previous: #{buffer}"
|
||||||
|
end
|
||||||
|
}
|
||||||
|
Whisper.log_set log_callback, prefix
|
||||||
|
```
|
||||||
|
|
||||||
|
Using this feature, you are also able to suppress log:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
Whisper.log_set ->(level, buffer, user_data) {
|
||||||
|
# do nothing
|
||||||
|
}, nil
|
||||||
|
Whisper::Context.new("base")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Low-level API to transcribe ###
|
||||||
|
|
||||||
|
You can also call `Whisper::Context#full` and `#full_parallel` with a Ruby array as samples. Although `#transcribe` with audio file path is recommended because it extracts PCM samples in C++ and is fast, `#full` and `#full_parallel` give you flexibility.
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
require "whisper"
|
||||||
|
require "wavefile"
|
||||||
|
|
||||||
|
reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :float, 16000))
|
||||||
|
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
|
||||||
|
|
||||||
|
whisper = Whisper::Context.new("base")
|
||||||
|
whisper
|
||||||
|
.full(Whisper::Params.new, samples)
|
||||||
|
.each_segment do |segment|
|
||||||
|
puts segment.text
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
|
||||||
|
|
||||||
|
Development
|
||||||
|
-----------
|
||||||
|
|
||||||
|
% git clone https://github.com/ggerganov/whisper.cpp.git
|
||||||
|
% cd whisper.cpp/bindings/ruby
|
||||||
|
% rake test
|
||||||
|
|
||||||
|
First call of `rake test` builds an extension and downloads a model for testing. After that, you add tests in `tests` directory and modify `ext/ruby_whisper.cpp`.
|
||||||
|
|
||||||
|
If something seems wrong on build, running `rake clean` solves some cases.
|
||||||
|
|
||||||
|
License
|
||||||
|
-------
|
||||||
|
|
||||||
|
The same to [whisper.cpp][].
|
||||||
|
|
||||||
|
[whisper.cpp]: https://github.com/ggerganov/whisper.cpp
|
||||||
|
[models]: https://github.com/ggerganov/whisper.cpp/tree/master/models
|
66
bindings/ruby/Rakefile
Normal file
66
bindings/ruby/Rakefile
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
require 'rake/clean'
|
||||||
|
require "bundler/gem_tasks"
|
||||||
|
require "rake/testtask"
|
||||||
|
require_relative "extsources"
|
||||||
|
|
||||||
|
SOURCES = FileList[]
|
||||||
|
|
||||||
|
EXTSOURCES.each do |src|
|
||||||
|
basename = src.pathmap("%f")
|
||||||
|
dest = basename == "LICENSE" ? basename : src.pathmap("%{../..,ext}p")
|
||||||
|
dir = dest.pathmap("%d")
|
||||||
|
file src
|
||||||
|
directory dir
|
||||||
|
file dest => [src, dir] do |t|
|
||||||
|
cp t.source, t.name
|
||||||
|
end
|
||||||
|
SOURCES.include dest
|
||||||
|
end
|
||||||
|
|
||||||
|
CLEAN.include SOURCES
|
||||||
|
CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"]
|
||||||
|
|
||||||
|
SRC = FileList["ext/*.{c,cpp,h}"]
|
||||||
|
|
||||||
|
task build: SOURCES
|
||||||
|
|
||||||
|
directory "pkg"
|
||||||
|
CLOBBER.include "pkg"
|
||||||
|
|
||||||
|
LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
|
||||||
|
SO_FILE = File.join("ext", LIB_NAME)
|
||||||
|
LIB_FILE = File.join("lib", LIB_NAME)
|
||||||
|
|
||||||
|
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
|
||||||
|
chdir "ext" do
|
||||||
|
ruby "extconf.rb"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
file SO_FILE => "ext/Makefile" do |t|
|
||||||
|
chdir "ext" do
|
||||||
|
sh "make"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
CLEAN.include SO_FILE
|
||||||
|
|
||||||
|
directory "lib"
|
||||||
|
file LIB_FILE => [SO_FILE, "lib"] do |t|
|
||||||
|
copy t.source, t.name
|
||||||
|
end
|
||||||
|
CLEAN.include LIB_FILE
|
||||||
|
|
||||||
|
Rake::TestTask.new do |t|
|
||||||
|
t.test_files = FileList["tests/test_*.rb"]
|
||||||
|
end
|
||||||
|
|
||||||
|
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
|
||||||
|
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
|
||||||
|
chdir "tests/jfk_reader" do
|
||||||
|
ruby "extconf.rb"
|
||||||
|
sh "make"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
CLEAN.include "tests/jfk_reader/jfk_reader.{o,#{RbConfig::CONFIG['DLEXT']}}"
|
||||||
|
|
||||||
|
task test: [LIB_FILE, TEST_MEMORY_VIEW]
|
14
bindings/ruby/ext/.gitignore
vendored
14
bindings/ruby/ext/.gitignore
vendored
@ -1,7 +1,11 @@
|
|||||||
Makefile
|
Makefile
|
||||||
ggml.c
|
whisper.so
|
||||||
ggml.h
|
|
||||||
whisper.bundle
|
whisper.bundle
|
||||||
whisper.cpp
|
whisper.dll
|
||||||
whisper.h
|
scripts/get-flags.mk
|
||||||
dr_wav.h
|
*.o
|
||||||
|
/*/**/*.c
|
||||||
|
/*/**/*.cpp
|
||||||
|
/*/**/*.h
|
||||||
|
/*/**/*.m
|
||||||
|
/*/**/*.metal
|
||||||
|
9
bindings/ruby/ext/cpu.mk
Normal file
9
bindings/ruby/ext/cpu.mk
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
|
||||||
|
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
||||||
|
ggml/include/ggml-backend.h \
|
||||||
|
ggml/include/ggml.h \
|
||||||
|
ggml/include/ggml-alloc.h \
|
||||||
|
ggml/src/ggml-backend-impl.h \
|
||||||
|
ggml/include/ggml-cpu.h \
|
||||||
|
ggml/src/ggml-impl.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
@ -1,13 +1,10 @@
|
|||||||
require 'mkmf'
|
require 'mkmf'
|
||||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .")
|
|
||||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .")
|
|
||||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
|
|
||||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
|
|
||||||
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .")
|
|
||||||
|
|
||||||
|
|
||||||
# need to use c++ compiler flags
|
# need to use c++ compiler flags
|
||||||
$CXXFLAGS << ' -std=c++11'
|
$CXXFLAGS << ' -std=c++17'
|
||||||
|
|
||||||
|
$LDFLAGS << ' -lstdc++'
|
||||||
|
|
||||||
# Set to true when building binary gems
|
# Set to true when building binary gems
|
||||||
if enable_config('static-stdlib', false)
|
if enable_config('static-stdlib', false)
|
||||||
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
$LDFLAGS << ' -static-libgcc -static-libstdc++'
|
||||||
@ -18,4 +15,194 @@ if enable_config('march-tune-native', false)
|
|||||||
$CXXFLAGS << ' -march=native -mtune=native'
|
$CXXFLAGS << ' -march=native -mtune=native'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if ENV['WHISPER_METAL']
|
||||||
|
$GGML_METAL ||= true
|
||||||
|
$DEPRECATE_WARNING ||= true
|
||||||
|
end
|
||||||
|
|
||||||
|
$UNAME_S = `uname -s`.chomp
|
||||||
|
$UNAME_P = `uname -p`.chomp
|
||||||
|
$UNAME_M = `uname -m`.chomp
|
||||||
|
|
||||||
|
if $UNAME_S == 'Darwin'
|
||||||
|
unless ENV['GGML_NO_METAL']
|
||||||
|
$GGML_METAL ||= true
|
||||||
|
end
|
||||||
|
$GGML_NO_OPENMP ||= true
|
||||||
|
end
|
||||||
|
|
||||||
|
if $GGML_METAL
|
||||||
|
$GGML_METAL_EMBED_LIBRARY = true
|
||||||
|
end
|
||||||
|
|
||||||
|
$MK_CPPFLAGS = '-Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -Iexamples -DGGML_USE_CPU'
|
||||||
|
$MK_CFLAGS = '-std=c11 -fPIC'
|
||||||
|
$MK_CXXFLAGS = '-std=c++17 -fPIC'
|
||||||
|
$MK_NVCCFLAGS = '-std=c++17'
|
||||||
|
$MK_LDFLAGS = ''
|
||||||
|
|
||||||
|
$OBJ_GGML = []
|
||||||
|
$OBJ_WHISPER = []
|
||||||
|
$OBJ_COMMON = []
|
||||||
|
$OBJ_SDL = []
|
||||||
|
|
||||||
|
$MK_CPPFLAGS << ' -D_XOPEN_SOURCE=600'
|
||||||
|
|
||||||
|
if $UNAME_S == 'Linux'
|
||||||
|
$MK_CPPFLAGS << ' -D_GNU_SOURCE'
|
||||||
|
end
|
||||||
|
|
||||||
|
if $UNAME_S == 'Darwin'
|
||||||
|
$MK_CPPFLAGS << ' -D_DARWIN_C_SOURCE'
|
||||||
|
end
|
||||||
|
|
||||||
|
if ENV['WHISPER_DEBUG']
|
||||||
|
$MK_CFLAGS << ' -O0 -g'
|
||||||
|
$MK_CXXFLAGS << ' -O0 -g'
|
||||||
|
$MK_LDFLAGS << ' -g'
|
||||||
|
$MK_NVCCFLAGS << ' -O0 -g'
|
||||||
|
else
|
||||||
|
$MK_CPPFLAGS << ' -DNDEBUG'
|
||||||
|
$MK_CFLAGS << ' -O3'
|
||||||
|
$MK_CXXFLAGS << ' -O3'
|
||||||
|
$MK_NVCCFLAGS << ' -O3'
|
||||||
|
end
|
||||||
|
|
||||||
|
$WARN_FLAGS =
|
||||||
|
' -Wall' <<
|
||||||
|
' -Wextra' <<
|
||||||
|
' -Wpedantic' <<
|
||||||
|
' -Wcast-qual' <<
|
||||||
|
' -Wno-unused-function'
|
||||||
|
|
||||||
|
$MK_CFLAGS <<
|
||||||
|
$WARN_FLAGS <<
|
||||||
|
' -Wshadow' <<
|
||||||
|
' -Wstrict-prototypes' <<
|
||||||
|
' -Wpointer-arith' <<
|
||||||
|
' -Wmissing-prototypes' <<
|
||||||
|
' -Werror=implicit-int' <<
|
||||||
|
' -Werror=implicit-function-declaration'
|
||||||
|
|
||||||
|
$MK_CXXFLAGS <<
|
||||||
|
$WARN_FLAGS <<
|
||||||
|
' -Wmissing-declarations' <<
|
||||||
|
' -Wmissing-noreturn'
|
||||||
|
|
||||||
|
unless `#{cc_command} #{$LDFLAGS} -Wl,-v 2>&1`.chomp.include? 'dyld-1015.7'
|
||||||
|
$MK_CPPFLAGS << ' -DHAVE_BUGGY_APPLE_LINKER'
|
||||||
|
end
|
||||||
|
|
||||||
|
if %w[Linux Darwin FreeBSD NetBSD OpenBSD Haiku].include? $UNAME_S
|
||||||
|
$MK_CFLAGS << ' -pthread'
|
||||||
|
$MK_CXXFLAGS << ' -pthread'
|
||||||
|
end
|
||||||
|
|
||||||
|
unless $_WIN32
|
||||||
|
$DSO_EXT = '.so'
|
||||||
|
else
|
||||||
|
$DSO_EXT = '.dll'
|
||||||
|
end
|
||||||
|
|
||||||
|
unless ENV['RISCV']
|
||||||
|
if %w[x86_64 i686 amd64].include? $UNAME_M
|
||||||
|
$HOST_CXXFLAGS ||= ''
|
||||||
|
|
||||||
|
$MK_CFLAGS << ' -march=native -mtune=native'
|
||||||
|
$HOST_CXXFLAGS << ' -march=native -mtune=native'
|
||||||
|
end
|
||||||
|
else
|
||||||
|
$MK_CFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
||||||
|
$MK_CXXFLAGS << ' -march=rv64gcv -mabi=lp64d'
|
||||||
|
end
|
||||||
|
|
||||||
|
unless ENV['GGML_NO_ACCELERATE']
|
||||||
|
if $UNAME_S == 'Darwin'
|
||||||
|
$MK_CPPFLAGS << ' -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE'
|
||||||
|
$MK_CPPFLAGS << ' -DACCELERATE_NEW_LAPACK'
|
||||||
|
$MK_CPPFLAGS << ' -DACCELERATE_LAPACK_ILP64'
|
||||||
|
$MK_LDFLAGS << ' -framework Accelerate'
|
||||||
|
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if ENV['GGML_OPENBLAS']
|
||||||
|
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas`.chomp}"
|
||||||
|
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas)`.chomp}"
|
||||||
|
$MK_LDFLAGS << " #{`pkg-config --libs openblas`}"
|
||||||
|
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||||
|
end
|
||||||
|
|
||||||
|
if ENV['GGML_OPENBLAS64']
|
||||||
|
$MK_CPPFLAGS << " -DGGML_USE_BLAS #{`pkg-config --cflags-only-I openblas64`.chomp}"
|
||||||
|
$MK_CFLAGS << " #{`pkg-config --cflags-only-other openblas64)`.chomp}"
|
||||||
|
$MK_LDFLAGS << " #{`pkg-config --libs openblas64`}"
|
||||||
|
$OBJ_GGML << 'ggml/src/ggml-blas/ggml-blas.o'
|
||||||
|
end
|
||||||
|
|
||||||
|
if $GGML_METAL
|
||||||
|
$MK_CPPFLAGS << ' -DGGML_USE_METAL'
|
||||||
|
$MK_LDFLAGS << ' -framework Foundation -framework Metal -framework MetalKit'
|
||||||
|
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal.o'
|
||||||
|
|
||||||
|
if ENV['GGML_METAL_NDEBUG']
|
||||||
|
$MK_CPPFLAGS << ' -DGGML_METAL_NDEBUG'
|
||||||
|
end
|
||||||
|
|
||||||
|
if $GGML_METAL_EMBED_LIBRARY
|
||||||
|
$MK_CPPFLAGS << ' -DGGML_METAL_EMBED_LIBRARY'
|
||||||
|
$OBJ_GGML << 'ggml/src/ggml-metal/ggml-metal-embed.o'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
$OBJ_GGML <<
|
||||||
|
'ggml/src/ggml.o' <<
|
||||||
|
'ggml/src/ggml-alloc.o' <<
|
||||||
|
'ggml/src/ggml-backend.o' <<
|
||||||
|
'ggml/src/ggml-backend-reg.o' <<
|
||||||
|
'ggml/src/ggml-opt.o' <<
|
||||||
|
'ggml/src/ggml-quants.o' <<
|
||||||
|
'ggml/src/ggml-threading.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu-cpp.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu-aarch64.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu-hbm.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu-quants.o' <<
|
||||||
|
'ggml/src/ggml-cpu/ggml-cpu-traits.o'
|
||||||
|
|
||||||
|
$OBJ_WHISPER <<
|
||||||
|
'src/whisper.o' <<
|
||||||
|
'examples/common.o' <<
|
||||||
|
'examples/common-whisper.o'
|
||||||
|
|
||||||
|
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
|
||||||
|
$objs <<
|
||||||
|
"ruby_whisper.o" <<
|
||||||
|
"ruby_whisper_context.o" <<
|
||||||
|
"ruby_whisper_transcribe.o" <<
|
||||||
|
"ruby_whisper_params.o" <<
|
||||||
|
"ruby_whisper_error.o" <<
|
||||||
|
"ruby_whisper_segment.o" <<
|
||||||
|
"ruby_whisper_model.o"
|
||||||
|
|
||||||
|
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
|
||||||
|
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
|
||||||
|
$BASE_CXXFLAGS = "#{$MK_CXXFLAGS} #{$CXXFLAGS}"
|
||||||
|
$CXXFLAGS = "#{$BASE_CXXFLAGS} #{$HOST_CXXFLAGS} #{$GF_CXXFLAGS} #{$CPPFLAGS}"
|
||||||
|
$NVCCFLAGS = "#{$MK_NVCCFLAGS} #{$NVCCFLAGS}"
|
||||||
|
$LDFLAGS = "#{$MK_LDFLAGS} #{$LDFLAGS}"
|
||||||
|
|
||||||
create_makefile('whisper')
|
create_makefile('whisper')
|
||||||
|
|
||||||
|
File.open 'Makefile', 'a' do |file|
|
||||||
|
file.puts 'include scripts/get-flags.mk'
|
||||||
|
file.puts 'include cpu.mk'
|
||||||
|
|
||||||
|
if $GGML_METAL
|
||||||
|
file.puts 'include metal.mk'
|
||||||
|
|
||||||
|
if $GGML_METAL_EMBED_LIBRARY
|
||||||
|
file.puts 'include metal-embed.mk'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
17
bindings/ruby/ext/metal-embed.mk
Normal file
17
bindings/ruby/ext/metal-embed.mk
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
ggml/src/ggml-metal/ggml-metal-embed.o: \
|
||||||
|
ggml/src/ggml-metal/ggml-metal.metal \
|
||||||
|
ggml/src/ggml-metal/ggml-metal-impl.h \
|
||||||
|
ggml/src/ggml-common.h
|
||||||
|
@echo "Embedding Metal library"
|
||||||
|
@sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
|
||||||
|
@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
|
||||||
|
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
|
||||||
|
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
|
||||||
|
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
|
||||||
|
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
|
||||||
|
@rmdir ${TEMP_ASSEMBLY}
|
6
bindings/ruby/ext/metal.mk
Normal file
6
bindings/ruby/ext/metal.mk
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
ggml/src/ggml-metal/ggml-metal.o: \
|
||||||
|
ggml/src/ggml-metal/ggml-metal.m \
|
||||||
|
ggml/src/ggml-metal/ggml-metal-impl.h \
|
||||||
|
ggml/include/ggml-metal.h \
|
||||||
|
ggml/include/ggml.h
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
164
bindings/ruby/ext/ruby_whisper.c
Normal file
164
bindings/ruby/ext/ruby_whisper.c
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include <ruby/memory_view.h>
|
||||||
|
#include "ruby_whisper.h"
|
||||||
|
|
||||||
|
VALUE mWhisper;
|
||||||
|
VALUE cContext;
|
||||||
|
VALUE cParams;
|
||||||
|
VALUE eError;
|
||||||
|
|
||||||
|
VALUE cSegment;
|
||||||
|
VALUE cModel;
|
||||||
|
|
||||||
|
ID id_to_s;
|
||||||
|
ID id_call;
|
||||||
|
ID id___method__;
|
||||||
|
ID id_to_enum;
|
||||||
|
ID id_length;
|
||||||
|
ID id_next;
|
||||||
|
ID id_new;
|
||||||
|
ID id_to_path;
|
||||||
|
ID id_URI;
|
||||||
|
ID id_pre_converted_models;
|
||||||
|
|
||||||
|
static bool is_log_callback_finalized = false;
|
||||||
|
|
||||||
|
// High level API
|
||||||
|
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
|
||||||
|
|
||||||
|
extern void init_ruby_whisper_context(VALUE *mWhisper);
|
||||||
|
extern void init_ruby_whisper_params(VALUE *mWhisper);
|
||||||
|
extern void init_ruby_whisper_error(VALUE *mWhisper);
|
||||||
|
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
|
||||||
|
extern void init_ruby_whisper_model(VALUE *mWhisper);
|
||||||
|
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* lang_max_id -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
|
||||||
|
return INT2NUM(whisper_lang_max_id());
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* lang_id(lang_name) -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
|
||||||
|
const char * lang_str = StringValueCStr(lang);
|
||||||
|
const int id = whisper_lang_id(lang_str);
|
||||||
|
if (-1 == id) {
|
||||||
|
rb_raise(rb_eArgError, "language not found: %s", lang_str);
|
||||||
|
}
|
||||||
|
return INT2NUM(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* lang_str(lang_id) -> String
|
||||||
|
*/
|
||||||
|
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
|
||||||
|
const int lang_id = NUM2INT(id);
|
||||||
|
const char * str = whisper_lang_str(lang_id);
|
||||||
|
if (NULL == str) {
|
||||||
|
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||||
|
}
|
||||||
|
return rb_str_new2(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* lang_str(lang_id) -> String
|
||||||
|
*/
|
||||||
|
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
|
||||||
|
const int lang_id = NUM2INT(id);
|
||||||
|
const char * str_full = whisper_lang_str_full(lang_id);
|
||||||
|
if (NULL == str_full) {
|
||||||
|
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
|
||||||
|
}
|
||||||
|
return rb_str_new2(str_full);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
|
||||||
|
is_log_callback_finalized = true;
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
|
||||||
|
if (is_log_callback_finalized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
|
||||||
|
VALUE udata = rb_iv_get(mWhisper, "user_data");
|
||||||
|
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
|
||||||
|
*/
|
||||||
|
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
|
||||||
|
VALUE old_callback = rb_iv_get(self, "log_callback");
|
||||||
|
if (!NIL_P(old_callback)) {
|
||||||
|
rb_undefine_finalizer(old_callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_iv_set(self, "log_callback", log_callback);
|
||||||
|
rb_iv_set(self, "user_data", user_data);
|
||||||
|
|
||||||
|
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
|
||||||
|
rb_define_finalizer(log_callback, finalize_log_callback);
|
||||||
|
|
||||||
|
whisper_log_set(ruby_whisper_log_callback, NULL);
|
||||||
|
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
||||||
|
rb_gc_mark(rwm->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
rwm = ALLOC(ruby_whisper_model);
|
||||||
|
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Init_whisper() {
|
||||||
|
id_to_s = rb_intern("to_s");
|
||||||
|
id_call = rb_intern("call");
|
||||||
|
id___method__ = rb_intern("__method__");
|
||||||
|
id_to_enum = rb_intern("to_enum");
|
||||||
|
id_length = rb_intern("length");
|
||||||
|
id_next = rb_intern("next");
|
||||||
|
id_new = rb_intern("new");
|
||||||
|
id_to_path = rb_intern("to_path");
|
||||||
|
id_URI = rb_intern("URI");
|
||||||
|
id_pre_converted_models = rb_intern("pre_converted_models");
|
||||||
|
|
||||||
|
mWhisper = rb_define_module("Whisper");
|
||||||
|
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
|
||||||
|
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
|
||||||
|
|
||||||
|
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
|
||||||
|
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
|
||||||
|
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
|
||||||
|
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
|
||||||
|
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
|
||||||
|
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
|
||||||
|
|
||||||
|
init_ruby_whisper_context(&mWhisper);
|
||||||
|
init_ruby_whisper_params(&mWhisper);
|
||||||
|
init_ruby_whisper_error(&mWhisper);
|
||||||
|
init_ruby_whisper_segment(&mWhisper, &cContext);
|
||||||
|
init_ruby_whisper_model(&mWhisper);
|
||||||
|
|
||||||
|
rb_require("whisper/model/uri");
|
||||||
|
}
|
@ -1,426 +0,0 @@
|
|||||||
#include <ruby.h>
|
|
||||||
#include "ruby_whisper.h"
|
|
||||||
#define DR_WAV_IMPLEMENTATION
|
|
||||||
#include "dr_wav.h"
|
|
||||||
#include <cmath>
|
|
||||||
#include <fstream>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <string>
|
|
||||||
#include <thread>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define BOOL_PARAMS_SETTER(self, prop, value) \
|
|
||||||
ruby_whisper_params *rwp; \
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
|
||||||
if (value == Qfalse || value == Qnil) { \
|
|
||||||
rwp->params.prop = false; \
|
|
||||||
} else { \
|
|
||||||
rwp->params.prop = true; \
|
|
||||||
} \
|
|
||||||
return value; \
|
|
||||||
|
|
||||||
#define BOOL_PARAMS_GETTER(self, prop) \
|
|
||||||
ruby_whisper_params *rwp; \
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp); \
|
|
||||||
if (rwp->params.prop) { \
|
|
||||||
return Qtrue; \
|
|
||||||
} else { \
|
|
||||||
return Qfalse; \
|
|
||||||
}
|
|
||||||
|
|
||||||
VALUE mWhisper;
|
|
||||||
VALUE cContext;
|
|
||||||
VALUE cParams;
|
|
||||||
|
|
||||||
static void ruby_whisper_free(ruby_whisper *rw) {
|
|
||||||
if (rw->context) {
|
|
||||||
whisper_free(rw->context);
|
|
||||||
rw->context = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static void ruby_whisper_params_free(ruby_whisper_params *rwp) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void rb_whisper_mark(ruby_whisper *rw) {
|
|
||||||
// call rb_gc_mark on any ruby references in rw
|
|
||||||
}
|
|
||||||
|
|
||||||
void rb_whisper_free(ruby_whisper *rw) {
|
|
||||||
ruby_whisper_free(rw);
|
|
||||||
free(rw);
|
|
||||||
}
|
|
||||||
|
|
||||||
void rb_whisper_params_mark(ruby_whisper_params *rwp) {
|
|
||||||
}
|
|
||||||
|
|
||||||
void rb_whisper_params_free(ruby_whisper_params *rwp) {
|
|
||||||
ruby_whisper_params_free(rwp);
|
|
||||||
free(rwp);
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_allocate(VALUE klass) {
|
|
||||||
ruby_whisper *rw;
|
|
||||||
rw = ALLOC(ruby_whisper);
|
|
||||||
rw->context = NULL;
|
|
||||||
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_params_allocate(VALUE klass) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
rwp = ALLOC(ruby_whisper_params);
|
|
||||||
rwp->params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
|
||||||
return Data_Wrap_Struct(klass, rb_whisper_params_mark, rb_whisper_params_free, rwp);
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_initialize(int argc, VALUE *argv, VALUE self) {
|
|
||||||
ruby_whisper *rw;
|
|
||||||
VALUE whisper_model_file_path;
|
|
||||||
|
|
||||||
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
|
||||||
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
|
||||||
Data_Get_Struct(self, ruby_whisper, rw);
|
|
||||||
|
|
||||||
if (!rb_respond_to(whisper_model_file_path, rb_intern("to_s"))) {
|
|
||||||
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
|
||||||
}
|
|
||||||
rw->context = whisper_init_from_file(StringValueCStr(whisper_model_file_path));
|
|
||||||
if (rw->context == nullptr) {
|
|
||||||
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
|
||||||
}
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* transcribe a single file
|
|
||||||
* can emit to a block results
|
|
||||||
*
|
|
||||||
**/
|
|
||||||
static VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
|
||||||
ruby_whisper *rw;
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
VALUE wave_file_path, blk, params;
|
|
||||||
|
|
||||||
rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk);
|
|
||||||
Data_Get_Struct(self, ruby_whisper, rw);
|
|
||||||
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
|
||||||
|
|
||||||
if (!rb_respond_to(wave_file_path, rb_intern("to_s"))) {
|
|
||||||
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string fname_inp = StringValueCStr(wave_file_path);
|
|
||||||
|
|
||||||
std::vector<float> pcmf32; // mono-channel F32 PCM
|
|
||||||
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
|
||||||
|
|
||||||
// WAV input - this is directly from main.cpp example
|
|
||||||
{
|
|
||||||
drwav wav;
|
|
||||||
std::vector<uint8_t> wav_data; // used for pipe input from stdin
|
|
||||||
|
|
||||||
if (fname_inp == "-") {
|
|
||||||
{
|
|
||||||
uint8_t buf[1024];
|
|
||||||
while (true) {
|
|
||||||
const size_t n = fread(buf, 1, sizeof(buf), stdin);
|
|
||||||
if (n == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
wav_data.insert(wav_data.end(), buf, buf + n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
|
|
||||||
fprintf(stderr, "error: failed to open WAV file from stdin\n");
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
|
|
||||||
} else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
|
|
||||||
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wav.channels != 1 && wav.channels != 2) {
|
|
||||||
fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) {
|
|
||||||
fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
|
|
||||||
fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wav.bitsPerSample != 16) {
|
|
||||||
fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str());
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
|
|
||||||
|
|
||||||
std::vector<int16_t> pcm16;
|
|
||||||
pcm16.resize(n*wav.channels);
|
|
||||||
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
|
|
||||||
drwav_uninit(&wav);
|
|
||||||
|
|
||||||
// convert to mono, float
|
|
||||||
pcmf32.resize(n);
|
|
||||||
if (wav.channels == 1) {
|
|
||||||
for (uint64_t i = 0; i < n; i++) {
|
|
||||||
pcmf32[i] = float(pcm16[i])/32768.0f;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (uint64_t i = 0; i < n; i++) {
|
|
||||||
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rwp->diarize) {
|
|
||||||
// convert to stereo, float
|
|
||||||
pcmf32s.resize(2);
|
|
||||||
|
|
||||||
pcmf32s[0].resize(n);
|
|
||||||
pcmf32s[1].resize(n);
|
|
||||||
for (uint64_t i = 0; i < n; i++) {
|
|
||||||
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
|
|
||||||
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
|
||||||
|
|
||||||
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
|
||||||
bool is_aborted = *(bool*)user_data;
|
|
||||||
return !is_aborted;
|
|
||||||
};
|
|
||||||
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
|
||||||
fprintf(stderr, "failed to process audio\n");
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
const int n_segments = whisper_full_n_segments(rw->context);
|
|
||||||
VALUE output = rb_str_new2("");
|
|
||||||
for (int i = 0; i < n_segments; ++i) {
|
|
||||||
const char * text = whisper_full_get_segment_text(rw->context, i);
|
|
||||||
output = rb_str_concat(output, rb_str_new2(text));
|
|
||||||
}
|
|
||||||
VALUE idCall = rb_intern("call");
|
|
||||||
if (blk != Qnil) {
|
|
||||||
rb_funcall(blk, idCall, 1, output);
|
|
||||||
}
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* params.language = "auto" | "en", etc...
|
|
||||||
*/
|
|
||||||
static VALUE ruby_whisper_params_set_language(VALUE self, VALUE value) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
if (value == Qfalse || value == Qnil) {
|
|
||||||
rwp->params.language = "auto";
|
|
||||||
} else {
|
|
||||||
rwp->params.language = StringValueCStr(value);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_language(VALUE self) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
if (rwp->params.language) {
|
|
||||||
return rb_str_new2(rwp->params.language);
|
|
||||||
} else {
|
|
||||||
return rb_str_new2("auto");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_translate(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, translate, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_translate(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, translate)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_no_context(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, no_context, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_no_context(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, no_context)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_single_segment(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, single_segment, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_single_segment(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, single_segment)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_print_special(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, print_special, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_print_special(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, print_special)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_print_progress(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, print_progress, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_print_progress(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, print_progress)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_print_realtime(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, print_realtime, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_print_realtime(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, print_realtime)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_print_timestamps(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, print_timestamps, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_print_timestamps(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, print_timestamps)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_suppress_blank(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, suppress_blank, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_suppress_blank(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, suppress_blank)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_suppress_non_speech_tokens(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, suppress_non_speech_tokens, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_suppress_non_speech_tokens(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, suppress_non_speech_tokens)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_token_timestamps(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, token_timestamps)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_token_timestamps(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, token_timestamps, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_split_on_word(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, split_on_word)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_split_on_word(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, split_on_word, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_speed_up(VALUE self) {
|
|
||||||
BOOL_PARAMS_GETTER(self, speed_up)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_speed_up(VALUE self, VALUE value) {
|
|
||||||
BOOL_PARAMS_SETTER(self, speed_up, value)
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_diarize(VALUE self) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
if (rwp->diarize) {
|
|
||||||
return Qtrue;
|
|
||||||
} else {
|
|
||||||
return Qfalse;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_diarize(VALUE self, VALUE value) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
if (value == Qfalse || value == Qnil) {
|
|
||||||
rwp->diarize = false;
|
|
||||||
} else {
|
|
||||||
rwp->diarize = true;
|
|
||||||
} \
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_params_get_offset(VALUE self) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
return INT2NUM(rwp->params.offset_ms);
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_offset(VALUE self, VALUE value) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
rwp->params.offset_ms = NUM2INT(value);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_get_duration(VALUE self) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
return INT2NUM(rwp->params.duration_ms);
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_duration(VALUE self, VALUE value) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
rwp->params.duration_ms = NUM2INT(value);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static VALUE ruby_whisper_params_get_max_text_tokens(VALUE self) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
return INT2NUM(rwp->params.n_max_text_ctx);
|
|
||||||
}
|
|
||||||
static VALUE ruby_whisper_params_set_max_text_tokens(VALUE self, VALUE value) {
|
|
||||||
ruby_whisper_params *rwp;
|
|
||||||
Data_Get_Struct(self, ruby_whisper_params, rwp);
|
|
||||||
rwp->params.n_max_text_ctx = NUM2INT(value);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Init_whisper() {
|
|
||||||
mWhisper = rb_define_module("Whisper");
|
|
||||||
cContext = rb_define_class_under(mWhisper, "Context", rb_cObject);
|
|
||||||
cParams = rb_define_class_under(mWhisper, "Params", rb_cObject);
|
|
||||||
|
|
||||||
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
|
||||||
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
|
||||||
|
|
||||||
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
|
||||||
|
|
||||||
rb_define_alloc_func(cParams, ruby_whisper_params_allocate);
|
|
||||||
|
|
||||||
rb_define_method(cParams, "language=", ruby_whisper_params_set_language, 1);
|
|
||||||
rb_define_method(cParams, "language", ruby_whisper_params_get_language, 0);
|
|
||||||
rb_define_method(cParams, "translate=", ruby_whisper_params_set_translate, 1);
|
|
||||||
rb_define_method(cParams, "translate", ruby_whisper_params_get_translate, 0);
|
|
||||||
rb_define_method(cParams, "no_context=", ruby_whisper_params_set_no_context, 1);
|
|
||||||
rb_define_method(cParams, "no_context", ruby_whisper_params_get_no_context, 0);
|
|
||||||
rb_define_method(cParams, "single_segment=", ruby_whisper_params_set_single_segment, 1);
|
|
||||||
rb_define_method(cParams, "single_segment", ruby_whisper_params_get_single_segment, 0);
|
|
||||||
rb_define_method(cParams, "print_special", ruby_whisper_params_get_print_special, 0);
|
|
||||||
rb_define_method(cParams, "print_special=", ruby_whisper_params_set_print_special, 1);
|
|
||||||
rb_define_method(cParams, "print_progress", ruby_whisper_params_get_print_progress, 0);
|
|
||||||
rb_define_method(cParams, "print_progress=", ruby_whisper_params_set_print_progress, 1);
|
|
||||||
rb_define_method(cParams, "print_realtime", ruby_whisper_params_get_print_realtime, 0);
|
|
||||||
rb_define_method(cParams, "print_realtime=", ruby_whisper_params_set_print_realtime, 1);
|
|
||||||
rb_define_method(cParams, "print_timestamps", ruby_whisper_params_get_print_timestamps, 0);
|
|
||||||
rb_define_method(cParams, "print_timestamps=", ruby_whisper_params_set_print_timestamps, 1);
|
|
||||||
rb_define_method(cParams, "suppress_blank", ruby_whisper_params_get_suppress_blank, 0);
|
|
||||||
rb_define_method(cParams, "suppress_blank=", ruby_whisper_params_set_suppress_blank, 1);
|
|
||||||
rb_define_method(cParams, "suppress_non_speech_tokens", ruby_whisper_params_get_suppress_non_speech_tokens, 0);
|
|
||||||
rb_define_method(cParams, "suppress_non_speech_tokens=", ruby_whisper_params_set_suppress_non_speech_tokens, 1);
|
|
||||||
rb_define_method(cParams, "token_timestamps", ruby_whisper_params_get_token_timestamps, 0);
|
|
||||||
rb_define_method(cParams, "token_timestamps=", ruby_whisper_params_set_token_timestamps, 1);
|
|
||||||
rb_define_method(cParams, "split_on_word", ruby_whisper_params_get_split_on_word, 0);
|
|
||||||
rb_define_method(cParams, "split_on_word=", ruby_whisper_params_set_split_on_word, 1);
|
|
||||||
rb_define_method(cParams, "speed_up", ruby_whisper_params_get_speed_up, 0);
|
|
||||||
rb_define_method(cParams, "speed_up=", ruby_whisper_params_set_speed_up, 1);
|
|
||||||
rb_define_method(cParams, "diarize", ruby_whisper_params_get_diarize, 0);
|
|
||||||
rb_define_method(cParams, "diarize=", ruby_whisper_params_set_diarize, 1);
|
|
||||||
|
|
||||||
rb_define_method(cParams, "offset", ruby_whisper_params_get_offset, 0);
|
|
||||||
rb_define_method(cParams, "offset=", ruby_whisper_params_set_offset, 1);
|
|
||||||
rb_define_method(cParams, "duration", ruby_whisper_params_get_duration, 0);
|
|
||||||
rb_define_method(cParams, "duration=", ruby_whisper_params_set_duration, 1);
|
|
||||||
|
|
||||||
rb_define_method(cParams, "max_text_tokens", ruby_whisper_params_get_max_text_tokens, 0);
|
|
||||||
rb_define_method(cParams, "max_text_tokens=", ruby_whisper_params_set_max_text_tokens, 1);
|
|
||||||
}
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -1,8 +1,15 @@
|
|||||||
#ifndef __RUBY_WHISPER_H
|
#ifndef RUBY_WHISPER_H
|
||||||
#define __RUBY_WHISPER_H
|
#define RUBY_WHISPER_H
|
||||||
|
|
||||||
#include "whisper.h"
|
#include "whisper.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
VALUE *context;
|
||||||
|
VALUE user_data;
|
||||||
|
VALUE callback;
|
||||||
|
VALUE callbacks;
|
||||||
|
} ruby_whisper_callback_container;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct whisper_context *context;
|
struct whisper_context *context;
|
||||||
} ruby_whisper;
|
} ruby_whisper;
|
||||||
@ -10,6 +17,18 @@ typedef struct {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
struct whisper_full_params params;
|
struct whisper_full_params params;
|
||||||
bool diarize;
|
bool diarize;
|
||||||
|
ruby_whisper_callback_container *new_segment_callback_container;
|
||||||
|
ruby_whisper_callback_container *progress_callback_container;
|
||||||
|
ruby_whisper_callback_container *abort_callback_container;
|
||||||
} ruby_whisper_params;
|
} ruby_whisper_params;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
VALUE context;
|
||||||
|
int index;
|
||||||
|
} ruby_whisper_segment;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
VALUE context;
|
||||||
|
} ruby_whisper_model;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
613
bindings/ruby/ext/ruby_whisper_context.c
Normal file
613
bindings/ruby/ext/ruby_whisper_context.c
Normal file
@ -0,0 +1,613 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include <ruby/memory_view.h>
|
||||||
|
#include "ruby_whisper.h"
|
||||||
|
|
||||||
|
extern ID id_to_s;
|
||||||
|
extern ID id___method__;
|
||||||
|
extern ID id_to_enum;
|
||||||
|
extern ID id_length;
|
||||||
|
extern ID id_next;
|
||||||
|
extern ID id_new;
|
||||||
|
extern ID id_to_path;
|
||||||
|
extern ID id_URI;
|
||||||
|
extern ID id_pre_converted_models;
|
||||||
|
|
||||||
|
extern VALUE cContext;
|
||||||
|
extern VALUE eError;
|
||||||
|
extern VALUE cModel;
|
||||||
|
|
||||||
|
extern VALUE ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self);
|
||||||
|
extern VALUE rb_whisper_model_initialize(VALUE context);
|
||||||
|
extern VALUE rb_whisper_segment_initialize(VALUE context, int index);
|
||||||
|
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
|
||||||
|
|
||||||
|
static void
|
||||||
|
ruby_whisper_free(ruby_whisper *rw)
|
||||||
|
{
|
||||||
|
if (rw->context) {
|
||||||
|
whisper_free(rw->context);
|
||||||
|
rw->context = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
rb_whisper_mark(ruby_whisper *rw)
|
||||||
|
{
|
||||||
|
// call rb_gc_mark on any ruby references in rw
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
rb_whisper_free(ruby_whisper *rw)
|
||||||
|
{
|
||||||
|
ruby_whisper_free(rw);
|
||||||
|
free(rw);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_allocate(VALUE klass)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
rw = ALLOC(ruby_whisper);
|
||||||
|
rw->context = NULL;
|
||||||
|
return Data_Wrap_Struct(klass, rb_whisper_mark, rb_whisper_free, rw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* new("base.en") -> Whisper::Context
|
||||||
|
* new("path/to/model.bin") -> Whisper::Context
|
||||||
|
* new(Whisper::Model::URI.new("https://example.net/uri/of/model.bin")) -> Whisper::Context
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_initialize(int argc, VALUE *argv, VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
VALUE whisper_model_file_path;
|
||||||
|
|
||||||
|
// TODO: we can support init from buffer here too maybe another ruby object to expose
|
||||||
|
rb_scan_args(argc, argv, "01", &whisper_model_file_path);
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
|
||||||
|
VALUE pre_converted_models = rb_funcall(cModel, id_pre_converted_models, 0);
|
||||||
|
VALUE pre_converted_model = rb_hash_aref(pre_converted_models, whisper_model_file_path);
|
||||||
|
if (!NIL_P(pre_converted_model)) {
|
||||||
|
whisper_model_file_path = pre_converted_model;
|
||||||
|
}
|
||||||
|
if (TYPE(whisper_model_file_path) == T_STRING) {
|
||||||
|
const char * whisper_model_file_path_str = StringValueCStr(whisper_model_file_path);
|
||||||
|
if (strncmp("http://", whisper_model_file_path_str, 7) == 0 || strncmp("https://", whisper_model_file_path_str, 8) == 0) {
|
||||||
|
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||||
|
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rb_obj_is_kind_of(whisper_model_file_path, rb_path2class("URI::HTTP"))) {
|
||||||
|
VALUE uri_class = rb_const_get(cModel, id_URI);
|
||||||
|
whisper_model_file_path = rb_class_new_instance(1, &whisper_model_file_path, uri_class);
|
||||||
|
}
|
||||||
|
if (rb_respond_to(whisper_model_file_path, id_to_path)) {
|
||||||
|
whisper_model_file_path = rb_funcall(whisper_model_file_path, id_to_path, 0);
|
||||||
|
}
|
||||||
|
if (!rb_respond_to(whisper_model_file_path, id_to_s)) {
|
||||||
|
rb_raise(rb_eRuntimeError, "Expected file path to model to initialize Whisper::Context");
|
||||||
|
}
|
||||||
|
rw->context = whisper_init_from_file_with_params(StringValueCStr(whisper_model_file_path), whisper_context_default_params());
|
||||||
|
if (rw->context == NULL) {
|
||||||
|
rb_raise(rb_eRuntimeError, "error: failed to initialize whisper context");
|
||||||
|
}
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_vocab -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_vocab(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_audio_ctx -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_audio_state -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_audio_state(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_audio_head -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_audio_head(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_audio_layer -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_audio_layer(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_text_ctx -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_text_ctx(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_text_state -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_text_state(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_text_head -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_text_head(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_text_layer -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_text_layer(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_n_mels -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_n_mels(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_ftype -> Integer
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_ftype(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_ftype(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model_type -> String
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_model_type(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
||||||
|
* Not thread safe for same context
|
||||||
|
* Uses the specified decoding strategy to obtain the text.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full(params, samples, n_samples) -> nil
|
||||||
|
* full(params, samples) -> nil
|
||||||
|
*
|
||||||
|
* The second argument +samples+ must be an array of samples, respond to :length, or be a MemoryView of an array of float. It must be 32 bit float PCM audio data.
|
||||||
|
*/
|
||||||
|
VALUE ruby_whisper_full(int argc, VALUE *argv, VALUE self)
|
||||||
|
{
|
||||||
|
if (argc < 2 || argc > 3) {
|
||||||
|
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||||
|
}
|
||||||
|
|
||||||
|
ruby_whisper *rw;
|
||||||
|
ruby_whisper_params *rwp;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
VALUE params = argv[0];
|
||||||
|
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||||
|
VALUE samples = argv[1];
|
||||||
|
int n_samples;
|
||||||
|
rb_memory_view_t view;
|
||||||
|
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||||
|
if (argc == 3) {
|
||||||
|
n_samples = NUM2INT(argv[2]);
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
if (RARRAY_LEN(samples) < n_samples) {
|
||||||
|
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Should check when samples.respond_to?(:length)?
|
||||||
|
} else {
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
n_samples = RARRAY_LEN(samples);
|
||||||
|
} else if (memory_view_available_p) {
|
||||||
|
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||||
|
view.obj = Qnil;
|
||||||
|
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||||
|
}
|
||||||
|
n_samples = view.byte_size / view.item_size;
|
||||||
|
} else if (rb_respond_to(samples, id_length)) {
|
||||||
|
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||||
|
} else {
|
||||||
|
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||||
|
if (memory_view_available_p) {
|
||||||
|
c_samples = (float *)view.data;
|
||||||
|
} else {
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// TODO: use rb_block_call
|
||||||
|
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
// TODO: check if iter is exhausted and raise ArgumentError appropriately
|
||||||
|
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||||
|
c_samples[i] = RFLOAT_VALUE(sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
register_callbacks(rwp, &self);
|
||||||
|
const int result = whisper_full(rw->context, rwp->params, c_samples, n_samples);
|
||||||
|
if (0 == result) {
|
||||||
|
return self;
|
||||||
|
} else {
|
||||||
|
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
|
||||||
|
* Result is stored in the default state of the context
|
||||||
|
* Not thread safe if executed in parallel on the same context.
|
||||||
|
* It seems this approach can offer some speedup in some cases.
|
||||||
|
* However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_parallel(params, samples) -> nil
|
||||||
|
* full_parallel(params, samples, n_samples) -> nil
|
||||||
|
* full_parallel(params, samples, n_samples, n_processors) -> nil
|
||||||
|
* full_parallel(params, samples, nil, n_processors) -> nil
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_parallel(int argc, VALUE *argv,VALUE self)
|
||||||
|
{
|
||||||
|
if (argc < 2 || argc > 4) {
|
||||||
|
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 2..3)", argc);
|
||||||
|
}
|
||||||
|
|
||||||
|
ruby_whisper *rw;
|
||||||
|
ruby_whisper_params *rwp;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
VALUE params = argv[0];
|
||||||
|
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||||
|
VALUE samples = argv[1];
|
||||||
|
int n_samples;
|
||||||
|
int n_processors;
|
||||||
|
rb_memory_view_t view;
|
||||||
|
const bool memory_view_available_p = rb_memory_view_available_p(samples);
|
||||||
|
switch (argc) {
|
||||||
|
case 2:
|
||||||
|
n_processors = 1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
n_processors = 1;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
n_processors = NUM2INT(argv[3]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (argc >= 3 && !NIL_P(argv[2])) {
|
||||||
|
n_samples = NUM2INT(argv[2]);
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
if (RARRAY_LEN(samples) < n_samples) {
|
||||||
|
rb_raise(rb_eArgError, "samples length %ld is less than n_samples %d", RARRAY_LEN(samples), n_samples);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Should check when samples.respond_to?(:length)?
|
||||||
|
} else if (memory_view_available_p) {
|
||||||
|
if (!rb_memory_view_get(samples, &view, RUBY_MEMORY_VIEW_SIMPLE)) {
|
||||||
|
view.obj = Qnil;
|
||||||
|
rb_raise(rb_eArgError, "unable to get a memory view");
|
||||||
|
}
|
||||||
|
n_samples = view.byte_size / view.item_size;
|
||||||
|
} else {
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
n_samples = RARRAY_LEN(samples);
|
||||||
|
} else if (rb_respond_to(samples, id_length)) {
|
||||||
|
n_samples = NUM2INT(rb_funcall(samples, id_length, 0));
|
||||||
|
} else {
|
||||||
|
rb_raise(rb_eArgError, "samples must respond to :length or be a MemoryView of an array of flaot when n_samples is not given");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
float * c_samples = (float *)malloc(n_samples * sizeof(float));
|
||||||
|
if (memory_view_available_p) {
|
||||||
|
c_samples = (float *)view.data;
|
||||||
|
} else {
|
||||||
|
if (TYPE(samples) == T_ARRAY) {
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
c_samples[i] = RFLOAT_VALUE(rb_ary_entry(samples, i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// FIXME: use rb_block_call
|
||||||
|
VALUE iter = rb_funcall(samples, id_to_enum, 1, rb_str_new2("each"));
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
// TODO: check if iter is exhausted and raise ArgumentError
|
||||||
|
VALUE sample = rb_funcall(iter, id_next, 0);
|
||||||
|
c_samples[i] = RFLOAT_VALUE(sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
register_callbacks(rwp, &self);
|
||||||
|
const int result = whisper_full_parallel(rw->context, rwp->params, c_samples, n_samples, n_processors);
|
||||||
|
if (0 == result) {
|
||||||
|
return self;
|
||||||
|
} else {
|
||||||
|
rb_exc_raise(rb_funcall(eError, id_new, 1, result));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of segments.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_n_segments -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_n_segments(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_full_n_segments(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Language ID, which can be converted to string by Whisper.lang_str and Whisper.lang_str_full.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_lang_id -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_lang_id(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_full_lang_id(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ruby_whisper_full_check_segment_index(const ruby_whisper * rw, const VALUE i_segment)
|
||||||
|
{
|
||||||
|
const int c_i_segment = NUM2INT(i_segment);
|
||||||
|
if (c_i_segment < 0 || c_i_segment >= whisper_full_n_segments(rw->context)) {
|
||||||
|
rb_raise(rb_eIndexError, "segment index %d out of range", c_i_segment);
|
||||||
|
}
|
||||||
|
return c_i_segment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||||
|
*
|
||||||
|
* full_get_segment_t0(3) # => 1668 (16680 ms)
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_get_segment_t0(segment_index) -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment_t0(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||||
|
const int64_t t0 = whisper_full_get_segment_t0(rw->context, c_i_segment);
|
||||||
|
return INT2NUM(t0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* End time of a segment indexed by +segment_index+ in centiseconds (10 times milliseconds).
|
||||||
|
*
|
||||||
|
* full_get_segment_t1(3) # => 1668 (16680 ms)
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_get_segment_t1(segment_index) -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment_t1(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||||
|
const int64_t t1 = whisper_full_get_segment_t1(rw->context, c_i_segment);
|
||||||
|
return INT2NUM(t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whether the next segment indexed by +segment_index+ is predicated as a speaker turn.
|
||||||
|
*
|
||||||
|
* full_get_segment_speacker_turn_next(3) # => true
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_get_segment_speacker_turn_next(segment_index) -> bool
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment_speaker_turn_next(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||||
|
const bool speaker_turn_next = whisper_full_get_segment_speaker_turn_next(rw->context, c_i_segment);
|
||||||
|
return speaker_turn_next ? Qtrue : Qfalse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Text of a segment indexed by +segment_index+.
|
||||||
|
*
|
||||||
|
* full_get_segment_text(3) # => "ask not what your country can do for you, ..."
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* full_get_segment_text(segment_index) -> String
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment_text(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||||
|
const char * text = whisper_full_get_segment_text(rw->context, c_i_segment);
|
||||||
|
return rb_str_new2(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* full_get_segment_no_speech_prob(segment_index) -> Float
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment_no_speech_prob(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
const int c_i_segment = ruby_whisper_full_check_segment_index(rw, i_segment);
|
||||||
|
const float no_speech_prob = whisper_full_get_segment_no_speech_prob(rw->context, c_i_segment);
|
||||||
|
return DBL2NUM(no_speech_prob);
|
||||||
|
}
|
||||||
|
|
||||||
|
// High level API
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_full_get_segment(VALUE self, VALUE i_segment)
|
||||||
|
{
|
||||||
|
return rb_whisper_segment_initialize(self, NUM2INT(i_segment));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Yields each Whisper::Segment:
|
||||||
|
*
|
||||||
|
* whisper.transcribe("path/to/audio.wav", params)
|
||||||
|
* whisper.each_segment do |segment|
|
||||||
|
* puts segment.text
|
||||||
|
* end
|
||||||
|
*
|
||||||
|
* Returns an Enumerator if no block given:
|
||||||
|
*
|
||||||
|
* whisper.transcribe("path/to/audio.wav", params)
|
||||||
|
* enum = whisper.each_segment
|
||||||
|
* enum.to_a # => [#<Whisper::Segment>, ...]
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* each_segment {|segment| ... }
|
||||||
|
* each_segment -> Enumerator
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_each_segment(VALUE self)
|
||||||
|
{
|
||||||
|
if (!rb_block_given_p()) {
|
||||||
|
const VALUE method_name = rb_funcall(self, id___method__, 0);
|
||||||
|
return rb_funcall(self, id_to_enum, 1, method_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
|
||||||
|
const int n_segments = whisper_full_n_segments(rw->context);
|
||||||
|
for (int i = 0; i < n_segments; ++i) {
|
||||||
|
rb_yield(rb_whisper_segment_initialize(self, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* model -> Whisper::Model
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_get_model(VALUE self)
|
||||||
|
{
|
||||||
|
return rb_whisper_model_initialize(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
init_ruby_whisper_context(VALUE *mWhisper)
|
||||||
|
{
|
||||||
|
cContext = rb_define_class_under(*mWhisper, "Context", rb_cObject);
|
||||||
|
|
||||||
|
rb_define_alloc_func(cContext, ruby_whisper_allocate);
|
||||||
|
rb_define_method(cContext, "initialize", ruby_whisper_initialize, -1);
|
||||||
|
|
||||||
|
rb_define_method(cContext, "transcribe", ruby_whisper_transcribe, -1);
|
||||||
|
rb_define_method(cContext, "model_n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||||
|
rb_define_method(cContext, "model_n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||||
|
rb_define_method(cContext, "model_n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||||
|
rb_define_method(cContext, "model_n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||||
|
rb_define_method(cContext, "model_n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||||
|
rb_define_method(cContext, "model_n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||||
|
rb_define_method(cContext, "model_n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||||
|
rb_define_method(cContext, "model_n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||||
|
rb_define_method(cContext, "model_n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||||
|
rb_define_method(cContext, "model_n_mels", ruby_whisper_model_n_mels, 0);
|
||||||
|
rb_define_method(cContext, "model_ftype", ruby_whisper_model_ftype, 0);
|
||||||
|
rb_define_method(cContext, "model_type", ruby_whisper_model_type, 0);
|
||||||
|
rb_define_method(cContext, "full_n_segments", ruby_whisper_full_n_segments, 0);
|
||||||
|
rb_define_method(cContext, "full_lang_id", ruby_whisper_full_lang_id, 0);
|
||||||
|
rb_define_method(cContext, "full_get_segment_t0", ruby_whisper_full_get_segment_t0, 1);
|
||||||
|
rb_define_method(cContext, "full_get_segment_t1", ruby_whisper_full_get_segment_t1, 1);
|
||||||
|
rb_define_method(cContext, "full_get_segment_speaker_turn_next", ruby_whisper_full_get_segment_speaker_turn_next, 1);
|
||||||
|
rb_define_method(cContext, "full_get_segment_text", ruby_whisper_full_get_segment_text, 1);
|
||||||
|
rb_define_method(cContext, "full_get_segment_no_speech_prob", ruby_whisper_full_get_segment_no_speech_prob, 1);
|
||||||
|
rb_define_method(cContext, "full", ruby_whisper_full, -1);
|
||||||
|
rb_define_method(cContext, "full_parallel", ruby_whisper_full_parallel, -1);
|
||||||
|
|
||||||
|
// High leve
|
||||||
|
rb_define_method(cContext, "full_get_segment", ruby_whisper_full_get_segment, 1);
|
||||||
|
rb_define_method(cContext, "each_segment", ruby_whisper_each_segment, 0);
|
||||||
|
|
||||||
|
rb_define_method(cContext, "model", ruby_whisper_get_model, 0);
|
||||||
|
}
|
52
bindings/ruby/ext/ruby_whisper_error.c
Normal file
52
bindings/ruby/ext/ruby_whisper_error.c
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
|
||||||
|
extern VALUE eError;
|
||||||
|
|
||||||
|
VALUE ruby_whisper_error_initialize(VALUE self, VALUE code)
|
||||||
|
{
|
||||||
|
const int c_code = NUM2INT(code);
|
||||||
|
const char *raw_message;
|
||||||
|
switch (c_code) {
|
||||||
|
case -2:
|
||||||
|
raw_message = "failed to compute log mel spectrogram";
|
||||||
|
break;
|
||||||
|
case -3:
|
||||||
|
raw_message = "failed to auto-detect language";
|
||||||
|
break;
|
||||||
|
case -4:
|
||||||
|
raw_message = "too many decoders requested";
|
||||||
|
break;
|
||||||
|
case -5:
|
||||||
|
raw_message = "audio_ctx is larger than the maximum allowed";
|
||||||
|
break;
|
||||||
|
case -6:
|
||||||
|
raw_message = "failed to encode";
|
||||||
|
break;
|
||||||
|
case -7:
|
||||||
|
raw_message = "whisper_kv_cache_init() failed for self-attention cache";
|
||||||
|
break;
|
||||||
|
case -8:
|
||||||
|
raw_message = "failed to decode";
|
||||||
|
break;
|
||||||
|
case -9:
|
||||||
|
raw_message = "failed to decode";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
raw_message = "unknown error";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const VALUE message = rb_str_new2(raw_message);
|
||||||
|
rb_call_super(1, &message);
|
||||||
|
rb_iv_set(self, "@code", code);
|
||||||
|
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
init_ruby_whisper_error(VALUE *mWhisper)
|
||||||
|
{
|
||||||
|
eError = rb_define_class_under(*mWhisper, "Error", rb_eStandardError);
|
||||||
|
|
||||||
|
rb_define_attr(eError, "code", true, false);
|
||||||
|
rb_define_method(eError, "initialize", ruby_whisper_error_initialize, 1);
|
||||||
|
}
|
210
bindings/ruby/ext/ruby_whisper_model.c
Normal file
210
bindings/ruby/ext/ruby_whisper_model.c
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include "ruby_whisper.h"
|
||||||
|
|
||||||
|
extern VALUE cModel;
|
||||||
|
|
||||||
|
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
|
||||||
|
rb_gc_mark(rwm->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
static VALUE ruby_whisper_model_allocate(VALUE klass) {
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
rwm = ALLOC(ruby_whisper_model);
|
||||||
|
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
|
||||||
|
}
|
||||||
|
|
||||||
|
VALUE rb_whisper_model_initialize(VALUE context) {
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
const VALUE model = ruby_whisper_model_allocate(cModel);
|
||||||
|
Data_Get_Struct(model, ruby_whisper_model, rwm);
|
||||||
|
rwm->context = context;
|
||||||
|
return model;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_vocab -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_vocab(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_vocab(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_audio_ctx -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_audio_ctx(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_ctx(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_audio_state -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_audio_state(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_state(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_audio_head -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_audio_head(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_head(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_audio_layer -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_audio_layer(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_audio_layer(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_text_ctx -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_text_ctx(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_ctx(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_text_state -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_text_state(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_state(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_text_head -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_text_head(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_head(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_text_layer -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_text_layer(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_text_layer(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* n_mels -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_n_mels(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_n_mels(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* ftype -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_ftype(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return INT2NUM(whisper_model_ftype(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* type -> String
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_model_type(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_model *rwm;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_model, rwm);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rwm->context, ruby_whisper, rw);
|
||||||
|
return rb_str_new2(whisper_model_type_readable(rw->context));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
init_ruby_whisper_model(VALUE *mWhisper)
|
||||||
|
{
|
||||||
|
cModel = rb_define_class_under(*mWhisper, "Model", rb_cObject);
|
||||||
|
|
||||||
|
rb_define_alloc_func(cModel, ruby_whisper_model_allocate);
|
||||||
|
rb_define_method(cModel, "n_vocab", ruby_whisper_model_n_vocab, 0);
|
||||||
|
rb_define_method(cModel, "n_audio_ctx", ruby_whisper_model_n_audio_ctx, 0);
|
||||||
|
rb_define_method(cModel, "n_audio_state", ruby_whisper_model_n_audio_state, 0);
|
||||||
|
rb_define_method(cModel, "n_audio_head", ruby_whisper_model_n_audio_head, 0);
|
||||||
|
rb_define_method(cModel, "n_audio_layer", ruby_whisper_model_n_audio_layer, 0);
|
||||||
|
rb_define_method(cModel, "n_text_ctx", ruby_whisper_model_n_text_ctx, 0);
|
||||||
|
rb_define_method(cModel, "n_text_state", ruby_whisper_model_n_text_state, 0);
|
||||||
|
rb_define_method(cModel, "n_text_head", ruby_whisper_model_n_text_head, 0);
|
||||||
|
rb_define_method(cModel, "n_text_layer", ruby_whisper_model_n_text_layer, 0);
|
||||||
|
rb_define_method(cModel, "n_mels", ruby_whisper_model_n_mels, 0);
|
||||||
|
rb_define_method(cModel, "ftype", ruby_whisper_model_ftype, 0);
|
||||||
|
rb_define_method(cModel, "type", ruby_whisper_model_type, 0);
|
||||||
|
}
|
1077
bindings/ruby/ext/ruby_whisper_params.c
Normal file
1077
bindings/ruby/ext/ruby_whisper_params.c
Normal file
File diff suppressed because it is too large
Load Diff
123
bindings/ruby/ext/ruby_whisper_segment.c
Normal file
123
bindings/ruby/ext/ruby_whisper_segment.c
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include "ruby_whisper.h"
|
||||||
|
|
||||||
|
extern VALUE cSegment;
|
||||||
|
|
||||||
|
static void
|
||||||
|
rb_whisper_segment_mark(ruby_whisper_segment *rws)
|
||||||
|
{
|
||||||
|
rb_gc_mark(rws->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
VALUE
|
||||||
|
ruby_whisper_segment_allocate(VALUE klass)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
rws = ALLOC(ruby_whisper_segment);
|
||||||
|
return Data_Wrap_Struct(klass, rb_whisper_segment_mark, RUBY_DEFAULT_FREE, rws);
|
||||||
|
}
|
||||||
|
|
||||||
|
VALUE
|
||||||
|
rb_whisper_segment_initialize(VALUE context, int index)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
const VALUE segment = ruby_whisper_segment_allocate(cSegment);
|
||||||
|
Data_Get_Struct(segment, ruby_whisper_segment, rws);
|
||||||
|
rws->context = context;
|
||||||
|
rws->index = index;
|
||||||
|
return segment;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start time in milliseconds.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* start_time -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_segment_get_start_time(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||||
|
const int64_t t0 = whisper_full_get_segment_t0(rw->context, rws->index);
|
||||||
|
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||||
|
return INT2NUM(t0 * 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* End time in milliseconds.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* end_time -> Integer
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_segment_get_end_time(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||||
|
const int64_t t1 = whisper_full_get_segment_t1(rw->context, rws->index);
|
||||||
|
// able to multiply 10 without overflow because to_timestamp() in whisper.cpp does it
|
||||||
|
return INT2NUM(t1 * 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whether the next segment is predicted as a speaker turn.
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* speaker_turn_next? -> bool
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_segment_get_speaker_turn_next(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||||
|
return whisper_full_get_segment_speaker_turn_next(rw->context, rws->index) ? Qtrue : Qfalse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* text -> String
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_segment_get_text(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||||
|
const char * text = whisper_full_get_segment_text(rw->context, rws->index);
|
||||||
|
return rb_str_new2(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* call-seq:
|
||||||
|
* no_speech_prob -> Float
|
||||||
|
*/
|
||||||
|
static VALUE
|
||||||
|
ruby_whisper_segment_get_no_speech_prob(VALUE self)
|
||||||
|
{
|
||||||
|
ruby_whisper_segment *rws;
|
||||||
|
Data_Get_Struct(self, ruby_whisper_segment, rws);
|
||||||
|
ruby_whisper *rw;
|
||||||
|
Data_Get_Struct(rws->context, ruby_whisper, rw);
|
||||||
|
return DBL2NUM(whisper_full_get_segment_no_speech_prob(rw->context, rws->index));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cContext)
|
||||||
|
{
|
||||||
|
cSegment = rb_define_class_under(*mWhisper, "Segment", rb_cObject);
|
||||||
|
|
||||||
|
rb_define_alloc_func(cSegment, ruby_whisper_segment_allocate);
|
||||||
|
rb_define_method(cSegment, "start_time", ruby_whisper_segment_get_start_time, 0);
|
||||||
|
rb_define_method(cSegment, "end_time", ruby_whisper_segment_get_end_time, 0);
|
||||||
|
rb_define_method(cSegment, "speaker_next_turn?", ruby_whisper_segment_get_speaker_turn_next, 0);
|
||||||
|
rb_define_method(cSegment, "text", ruby_whisper_segment_get_text, 0);
|
||||||
|
rb_define_method(cSegment, "no_speech_prob", ruby_whisper_segment_get_no_speech_prob, 0);
|
||||||
|
}
|
83
bindings/ruby/ext/ruby_whisper_transcribe.cpp
Normal file
83
bindings/ruby/ext/ruby_whisper_transcribe.cpp
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include "ruby_whisper.h"
|
||||||
|
#include "common-whisper.h"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern ID id_to_s;
|
||||||
|
extern ID id_call;
|
||||||
|
|
||||||
|
extern void
|
||||||
|
register_callbacks(ruby_whisper_params * rwp, VALUE * self);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* transcribe a single file
|
||||||
|
* can emit to a block results
|
||||||
|
*
|
||||||
|
* params = Whisper::Params.new
|
||||||
|
* params.duration = 60_000
|
||||||
|
* whisper.transcribe "path/to/audio.wav", params do |text|
|
||||||
|
* puts text
|
||||||
|
* end
|
||||||
|
*
|
||||||
|
* call-seq:
|
||||||
|
* transcribe(path_to_audio, params) {|text| ...}
|
||||||
|
**/
|
||||||
|
VALUE
|
||||||
|
ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) {
|
||||||
|
ruby_whisper *rw;
|
||||||
|
ruby_whisper_params *rwp;
|
||||||
|
VALUE wave_file_path, blk, params;
|
||||||
|
|
||||||
|
rb_scan_args(argc, argv, "02&", &wave_file_path, ¶ms, &blk);
|
||||||
|
Data_Get_Struct(self, ruby_whisper, rw);
|
||||||
|
Data_Get_Struct(params, ruby_whisper_params, rwp);
|
||||||
|
|
||||||
|
if (!rb_respond_to(wave_file_path, id_to_s)) {
|
||||||
|
rb_raise(rb_eRuntimeError, "Expected file path to wave file");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string fname_inp = StringValueCStr(wave_file_path);
|
||||||
|
|
||||||
|
std::vector<float> pcmf32; // mono-channel F32 PCM
|
||||||
|
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
||||||
|
|
||||||
|
if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) {
|
||||||
|
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
static bool is_aborted = false; // NOTE: this should be atomic to avoid data race
|
||||||
|
|
||||||
|
rwp->params.encoder_begin_callback = [](struct whisper_context * /*ctx*/, struct whisper_state * /*state*/, void * user_data) {
|
||||||
|
bool is_aborted = *(bool*)user_data;
|
||||||
|
return !is_aborted;
|
||||||
|
};
|
||||||
|
rwp->params.encoder_begin_callback_user_data = &is_aborted;
|
||||||
|
}
|
||||||
|
|
||||||
|
register_callbacks(rwp, &self);
|
||||||
|
|
||||||
|
if (whisper_full_parallel(rw->context, rwp->params, pcmf32.data(), pcmf32.size(), 1) != 0) {
|
||||||
|
fprintf(stderr, "failed to process audio\n");
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
const int n_segments = whisper_full_n_segments(rw->context);
|
||||||
|
VALUE output = rb_str_new2("");
|
||||||
|
for (int i = 0; i < n_segments; ++i) {
|
||||||
|
const char * text = whisper_full_get_segment_text(rw->context, i);
|
||||||
|
output = rb_str_concat(output, rb_str_new2(text));
|
||||||
|
}
|
||||||
|
VALUE idCall = id_call;
|
||||||
|
if (blk != Qnil) {
|
||||||
|
rb_funcall(blk, idCall, 1, output);
|
||||||
|
}
|
||||||
|
return self;
|
||||||
|
}
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
6
bindings/ruby/extsources.rb
Normal file
6
bindings/ruby/extsources.rb
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
require "yaml"
|
||||||
|
|
||||||
|
sources = `git ls-files -z ../..`.split("\x0")
|
||||||
|
paths = YAML.load_file("../../.github/workflows/bindings-ruby.yml")[true]["push"]["paths"]
|
||||||
|
paths.delete "bindings/ruby/**"
|
||||||
|
EXTSOURCES = (Dir.glob(paths, base: "../..").collect {|path| "../../#{path}"} << "../../LICENSE") & sources
|
170
bindings/ruby/lib/whisper/model/uri.rb
Normal file
170
bindings/ruby/lib/whisper/model/uri.rb
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
require "uri"
|
||||||
|
require "net/http"
|
||||||
|
require "time"
|
||||||
|
require "pathname"
|
||||||
|
require "io/console/size"
|
||||||
|
|
||||||
|
module Whisper
|
||||||
|
class Model
|
||||||
|
class URI
|
||||||
|
def initialize(uri)
|
||||||
|
@uri = URI(uri)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_path
|
||||||
|
cache
|
||||||
|
cache_path.to_path
|
||||||
|
end
|
||||||
|
|
||||||
|
def clear_cache
|
||||||
|
path = cache_path
|
||||||
|
path.delete if path.exist?
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def cache_path
|
||||||
|
base_cache_dir/@uri.host/@uri.path[1..]
|
||||||
|
end
|
||||||
|
|
||||||
|
def base_cache_dir
|
||||||
|
base = case RUBY_PLATFORM
|
||||||
|
when /mswin|mingw/
|
||||||
|
ENV.key?("LOCALAPPDATA") ? Pathname(ENV["LOCALAPPDATA"]) : Pathname(Dir.home)/"AppData/Local"
|
||||||
|
when /darwin/
|
||||||
|
Pathname(Dir.home)/"Library/Caches"
|
||||||
|
else
|
||||||
|
ENV.key?("XDG_CACHE_HOME") ? ENV["XDG_CACHE_HOME"] : Pathname(Dir.home)/".cache"
|
||||||
|
end
|
||||||
|
base/"whisper.cpp"
|
||||||
|
end
|
||||||
|
|
||||||
|
def cache
|
||||||
|
path = cache_path
|
||||||
|
headers = {}
|
||||||
|
headers["if-modified-since"] = path.mtime.httpdate if path.exist?
|
||||||
|
request @uri, headers
|
||||||
|
path
|
||||||
|
end
|
||||||
|
|
||||||
|
def request(uri, headers)
|
||||||
|
Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == "https" do |http|
|
||||||
|
request = Net::HTTP::Get.new(uri, headers)
|
||||||
|
http.request request do |response|
|
||||||
|
case response
|
||||||
|
when Net::HTTPNotModified
|
||||||
|
# noop
|
||||||
|
when Net::HTTPOK
|
||||||
|
download response
|
||||||
|
when Net::HTTPRedirection
|
||||||
|
request URI(response["location"]), headers
|
||||||
|
else
|
||||||
|
return if headers.key?("if-modified-since") # Use cache file
|
||||||
|
|
||||||
|
raise "#{response.code} #{response.message}\n#{response.body}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue => err
|
||||||
|
if cache_path.exist?
|
||||||
|
warn err
|
||||||
|
# Use cache file
|
||||||
|
else
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def download(response)
|
||||||
|
path = cache_path
|
||||||
|
path.dirname.mkpath unless path.dirname.exist?
|
||||||
|
downloading_path = Pathname("#{path}.downloading")
|
||||||
|
size = response.content_length
|
||||||
|
downloading_path.open "wb" do |file|
|
||||||
|
downloaded = 0
|
||||||
|
response.read_body do |chunk|
|
||||||
|
file << chunk
|
||||||
|
downloaded += chunk.bytesize
|
||||||
|
show_progress downloaded, size
|
||||||
|
end
|
||||||
|
$stderr.puts
|
||||||
|
end
|
||||||
|
downloading_path.rename path
|
||||||
|
end
|
||||||
|
|
||||||
|
def show_progress(current, size)
|
||||||
|
progress_rate_available = size && $stderr.tty?
|
||||||
|
|
||||||
|
unless @prev
|
||||||
|
@prev = Time.now
|
||||||
|
$stderr.puts "Downloading #{@uri} to #{cache_path}"
|
||||||
|
end
|
||||||
|
|
||||||
|
now = Time.now
|
||||||
|
|
||||||
|
if progress_rate_available
|
||||||
|
return if now - @prev < 1 && current < size
|
||||||
|
|
||||||
|
progress_width = 20
|
||||||
|
progress = current.to_f / size
|
||||||
|
arrow_length = progress * progress_width
|
||||||
|
arrow = "=" * (arrow_length - 1) + ">" + " " * (progress_width - arrow_length)
|
||||||
|
line = "[#{arrow}] (#{format_bytesize(current)} / #{format_bytesize(size)})"
|
||||||
|
padding = ' ' * ($stderr.winsize[1] - line.size)
|
||||||
|
$stderr.print "\r#{line}#{padding}"
|
||||||
|
else
|
||||||
|
return if now - @prev < 1
|
||||||
|
|
||||||
|
$stderr.print "."
|
||||||
|
end
|
||||||
|
@prev = now
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_bytesize(bytesize)
|
||||||
|
return "0.0 B" if bytesize.zero?
|
||||||
|
|
||||||
|
units = %w[B KiB MiB GiB TiB]
|
||||||
|
exp = (Math.log(bytesize) / Math.log(1024)).to_i
|
||||||
|
format("%.1f %s", bytesize.to_f / 1024 ** exp, units[exp])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@pre_converted_models = %w[
|
||||||
|
tiny
|
||||||
|
tiny.en
|
||||||
|
tiny-q5_1
|
||||||
|
tiny.en-q5_1
|
||||||
|
tiny-q8_0
|
||||||
|
base
|
||||||
|
base.en
|
||||||
|
base-q5_1
|
||||||
|
base.en-q5_1
|
||||||
|
base-q8_0
|
||||||
|
small
|
||||||
|
small.en
|
||||||
|
small.en-tdrz
|
||||||
|
small-q5_1
|
||||||
|
small.en-q5_1
|
||||||
|
small-q8_0
|
||||||
|
medium
|
||||||
|
medium.en
|
||||||
|
medium-q5_0
|
||||||
|
medium.en-q5_0
|
||||||
|
medium-q8_0
|
||||||
|
large-v1
|
||||||
|
large-v2
|
||||||
|
large-v2-q5_0
|
||||||
|
large-v2-q8_0
|
||||||
|
large-v3
|
||||||
|
large-v3-q5_0
|
||||||
|
large-v3-turbo
|
||||||
|
large-v3-turbo-q5_0
|
||||||
|
large-v3-turbo-q8_0
|
||||||
|
].each_with_object({}) {|name, models|
|
||||||
|
models[name] = URI.new("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-#{name}.bin")
|
||||||
|
}
|
||||||
|
|
||||||
|
class << self
|
||||||
|
attr_reader :pre_converted_models
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
189
bindings/ruby/sig/whisper.rbs
Normal file
189
bindings/ruby/sig/whisper.rbs
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
module Whisper
|
||||||
|
interface _Samples
|
||||||
|
def length: () -> Integer
|
||||||
|
def each: { (Float) -> void } -> void
|
||||||
|
end
|
||||||
|
|
||||||
|
type log_callback = ^(Integer level, String message, Object user_data) -> void
|
||||||
|
type new_segment_callback = ^(Whisper::Context, void, Integer n_new, Object user_data) -> void
|
||||||
|
type progress_callback = ^(Whisper::Context, void, Integer progress, Object user_data) -> void
|
||||||
|
type abort_callback = ^(Whisper::Context, void, Object user_data) -> boolish
|
||||||
|
|
||||||
|
LOG_LEVEL_NONE: Integer
|
||||||
|
LOG_LEVEL_INFO: Integer
|
||||||
|
LOG_LEVEL_WARN: Integer
|
||||||
|
LOG_LEVEL_ERROR: Integer
|
||||||
|
LOG_LEVEL_DEBUG: Integer
|
||||||
|
LOG_LEVEL_CONT: Integer
|
||||||
|
|
||||||
|
def self.lang_max_id: () -> Integer
|
||||||
|
def self.lang_id: (string name) -> Integer
|
||||||
|
def self.lang_str: (Integer id) -> String
|
||||||
|
def self.lang_str_full: (Integer id) -> String
|
||||||
|
def self.log_set: (log_callback, Object? user_data) -> log_callback
|
||||||
|
|
||||||
|
class Context
|
||||||
|
def self.new: (string | _ToPath | ::URI::HTTP) -> instance
|
||||||
|
def transcribe: (string, Params) -> self
|
||||||
|
| (string, Params) { (String) -> void } -> self
|
||||||
|
def model_n_vocab: () -> Integer
|
||||||
|
def model_n_audio_ctx: () -> Integer
|
||||||
|
def model_n_audio_state: () -> Integer
|
||||||
|
def model_n_text_head: () -> Integer
|
||||||
|
def model_n_text_layer: () -> Integer
|
||||||
|
def model_n_mels: () -> Integer
|
||||||
|
def model_ftype: () -> Integer
|
||||||
|
def model_type: () -> String
|
||||||
|
def each_segment: { (Segment) -> void } -> void
|
||||||
|
| () -> Enumerator[Segment]
|
||||||
|
def model: () -> Model
|
||||||
|
def full_get_segment: (Integer nth) -> Segment
|
||||||
|
def full_n_segments: () -> Integer
|
||||||
|
def full_lang_id: () -> Integer
|
||||||
|
def full_get_segment_t0: (Integer) -> Integer
|
||||||
|
def full_get_segment_t1: (Integer) -> Integer
|
||||||
|
def full_get_segment_speaker_turn_next: (Integer) -> (true | false)
|
||||||
|
def full_get_segment_text: (Integer) -> String
|
||||||
|
def full_get_segment_no_speech_prob: (Integer) -> Float
|
||||||
|
def full: (Params, Array[Float] samples, ?Integer n_samples) -> self
|
||||||
|
| (Params, _Samples, ?Integer n_samples) -> self
|
||||||
|
def full_parallel: (Params, Array[Float], ?Integer n_samples) -> self
|
||||||
|
| (Params, _Samples, ?Integer n_samples) -> self
|
||||||
|
| (Params, _Samples, ?Integer? n_samples, Integer n_processors) -> self
|
||||||
|
end
|
||||||
|
|
||||||
|
class Params
|
||||||
|
def self.new: (
|
||||||
|
?language: string,
|
||||||
|
?translate: boolish,
|
||||||
|
?no_context: boolish,
|
||||||
|
?single_segment: boolish,
|
||||||
|
?print_special: boolish,
|
||||||
|
?print_progress: boolish,
|
||||||
|
?print_realtime: boolish,
|
||||||
|
?print_timestamps: boolish,
|
||||||
|
?suppress_blank: boolish,
|
||||||
|
?suppress_nst: boolish,
|
||||||
|
?token_timestamps: boolish,
|
||||||
|
?split_on_word: boolish,
|
||||||
|
?initial_prompt: string | nil,
|
||||||
|
?diarize: boolish,
|
||||||
|
?offset: Integer,
|
||||||
|
?duration: Integer,
|
||||||
|
?max_text_tokens: Integer,
|
||||||
|
?temperature: Float,
|
||||||
|
?max_initial_ts: Float,
|
||||||
|
?length_penalty: Float,
|
||||||
|
?temperature_inc: Float,
|
||||||
|
?entropy_thold: Float,
|
||||||
|
?logprob_thold: Float,
|
||||||
|
?no_speech_thold: Float,
|
||||||
|
?new_segment_callback: new_segment_callback,
|
||||||
|
?new_segment_callback_user_data: Object,
|
||||||
|
?progress_callback: progress_callback,
|
||||||
|
?progress_callback_user_data: Object,
|
||||||
|
?abort_callback: abort_callback,
|
||||||
|
?abort_callback_user_data: Object
|
||||||
|
) -> instance
|
||||||
|
def language=: (String) -> String # TODO: Enumerate lang names
|
||||||
|
def language: () -> String
|
||||||
|
def translate=: (boolish) -> boolish
|
||||||
|
def translate: () -> (true | false)
|
||||||
|
def no_context=: (boolish) -> boolish
|
||||||
|
def no_context: () -> (true | false)
|
||||||
|
def single_segment=: (boolish) -> boolish
|
||||||
|
def single_segment: () -> (true | false)
|
||||||
|
def print_special=: (boolish) -> boolish
|
||||||
|
def print_special: () -> (true | false)
|
||||||
|
def print_progress=: (boolish) -> boolish
|
||||||
|
def print_progress: () -> (true | false)
|
||||||
|
def print_realtime=: (boolish) -> boolish
|
||||||
|
def print_realtime: () -> (true | false)
|
||||||
|
def print_timestamps=: (boolish) -> boolish
|
||||||
|
def print_timestamps: () -> (true | false)
|
||||||
|
def suppress_blank=: (boolish) -> boolish
|
||||||
|
def suppress_blank: () -> (true | false)
|
||||||
|
def suppress_nst=: (boolish) -> boolish
|
||||||
|
def suppress_nst: () -> (true | false)
|
||||||
|
def token_timestamps=: (boolish) -> boolish
|
||||||
|
def token_timestamps: () -> (true | false)
|
||||||
|
def split_on_word=: (boolish) -> boolish
|
||||||
|
def split_on_word: () -> (true | false)
|
||||||
|
def initial_prompt=: (_ToS) -> _ToS
|
||||||
|
def initial_prompt: () -> (String | nil)
|
||||||
|
def diarize=: (boolish) -> boolish
|
||||||
|
def diarize: () -> (true | false)
|
||||||
|
def offset=: (Integer) -> Integer
|
||||||
|
def offset: () -> Integer
|
||||||
|
def duration=: (Integer) -> Integer
|
||||||
|
def duration: () -> Integer
|
||||||
|
def max_text_tokens=: (Integer) -> Integer
|
||||||
|
def max_text_tokens: () -> Integer
|
||||||
|
def temperature=: (Float) -> Float
|
||||||
|
def temperature: () -> Float
|
||||||
|
def max_initial_ts=: (Float) -> Float
|
||||||
|
def max_initial_ts: () -> Float
|
||||||
|
def length_penalty=: (Float) -> Float
|
||||||
|
def length_penalty: () -> Float
|
||||||
|
def temperature_inc=: (Float) -> Float
|
||||||
|
def temperature_inc: () -> Float
|
||||||
|
def entropy_thold=: (Float) -> Float
|
||||||
|
def entropy_thold: () -> Float
|
||||||
|
def logprob_thold=: (Float) -> Float
|
||||||
|
def logprob_thold: () -> Float
|
||||||
|
def no_speech_thold=: (Float) -> Float
|
||||||
|
def no_speech_thold: () -> Float
|
||||||
|
def new_segment_callback=: (new_segment_callback) -> new_segment_callback
|
||||||
|
def new_segment_callback: () -> (new_segment_callback | nil)
|
||||||
|
def new_segment_callback_user_data=: (Object) -> Object
|
||||||
|
def new_segment_callback_user_data: () -> Object
|
||||||
|
def progress_callback=: (progress_callback) -> progress_callback
|
||||||
|
def progress_callback: () -> (progress_callback | nil)
|
||||||
|
def progress_callback_user_data=: (Object) -> Object
|
||||||
|
def progress_callback_user_data: () -> Object
|
||||||
|
def abort_callback=: (abort_callback) -> abort_callback
|
||||||
|
def abort_callback: () -> (abort_callback | nil)
|
||||||
|
def abort_callback_user_data=: (Object) -> Object
|
||||||
|
def abort_callback_user_data: () -> Object
|
||||||
|
def on_new_segment: { (Segment) -> void } -> void
|
||||||
|
def on_progress: { (Integer progress) -> void } -> void
|
||||||
|
def abort_on: { (Object user_data) -> boolish } -> void
|
||||||
|
end
|
||||||
|
|
||||||
|
class Model
|
||||||
|
def self.pre_converted_models: () -> Hash[String, Model::URI]
|
||||||
|
def self.new: () -> instance
|
||||||
|
def n_vocab: () -> Integer
|
||||||
|
def n_audio_ctx: () -> Integer
|
||||||
|
def n_audio_state: () -> Integer
|
||||||
|
def n_audio_head: () -> Integer
|
||||||
|
def n_audio_layer: () -> Integer
|
||||||
|
def n_text_ctx: () -> Integer
|
||||||
|
def n_text_state: () -> Integer
|
||||||
|
def n_text_head: () -> Integer
|
||||||
|
def n_text_layer: () -> Integer
|
||||||
|
def n_mels: () -> Integer
|
||||||
|
def ftype: () -> Integer
|
||||||
|
def type: () -> String
|
||||||
|
|
||||||
|
class URI
|
||||||
|
def self.new: (string | ::URI::HTTP) -> self
|
||||||
|
def to_path: -> String
|
||||||
|
def clear_cache: -> void
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Segment
|
||||||
|
def start_time: () -> Integer
|
||||||
|
def end_time: () -> Integer
|
||||||
|
def speaker_next_turn?: () -> (true | false)
|
||||||
|
def text: () -> String
|
||||||
|
def no_speech_prob: () -> Float
|
||||||
|
end
|
||||||
|
|
||||||
|
class Error < StandardError
|
||||||
|
attr_reader code: Integer
|
||||||
|
|
||||||
|
def self.new: (Integer code) -> instance
|
||||||
|
end
|
||||||
|
end
|
24
bindings/ruby/tests/helper.rb
Normal file
24
bindings/ruby/tests/helper.rb
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
require "test/unit"
|
||||||
|
require "whisper"
|
||||||
|
require_relative "jfk_reader/jfk_reader"
|
||||||
|
|
||||||
|
class TestBase < Test::Unit::TestCase
|
||||||
|
AUDIO = File.join(__dir__, "..", "..", "..", "samples", "jfk.wav")
|
||||||
|
|
||||||
|
class << self
|
||||||
|
attr_reader :whisper
|
||||||
|
|
||||||
|
def startup
|
||||||
|
@whisper = Whisper::Context.new("base.en")
|
||||||
|
params = Whisper::Params.new
|
||||||
|
params.print_timestamps = false
|
||||||
|
@whisper.transcribe(TestBase::AUDIO, params)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def whisper
|
||||||
|
self.class.whisper
|
||||||
|
end
|
||||||
|
end
|
5
bindings/ruby/tests/jfk_reader/.gitignore
vendored
Normal file
5
bindings/ruby/tests/jfk_reader/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
Makefile
|
||||||
|
jfk_reader.o
|
||||||
|
jfk_reader.so
|
||||||
|
jfk_reader.bundle
|
||||||
|
jfk_reader.dll
|
3
bindings/ruby/tests/jfk_reader/extconf.rb
Normal file
3
bindings/ruby/tests/jfk_reader/extconf.rb
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
require "mkmf"
|
||||||
|
|
||||||
|
create_makefile("jfk_reader")
|
68
bindings/ruby/tests/jfk_reader/jfk_reader.c
Normal file
68
bindings/ruby/tests/jfk_reader/jfk_reader.c
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#include <ruby.h>
|
||||||
|
#include <ruby/memory_view.h>
|
||||||
|
#include <ruby/encoding.h>
|
||||||
|
|
||||||
|
static VALUE
|
||||||
|
jfk_reader_initialize(VALUE self, VALUE audio_path)
|
||||||
|
{
|
||||||
|
rb_iv_set(self, "audio_path", audio_path);
|
||||||
|
return Qnil;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
jfk_reader_get_memory_view(const VALUE obj, rb_memory_view_t *view, int flags)
|
||||||
|
{
|
||||||
|
VALUE audio_path = rb_iv_get(obj, "audio_path");
|
||||||
|
const char *audio_path_str = StringValueCStr(audio_path);
|
||||||
|
const int n_samples = 176000;
|
||||||
|
float *data = (float *)malloc(n_samples * sizeof(float));
|
||||||
|
short *samples = (short *)malloc(n_samples * sizeof(short));
|
||||||
|
FILE *file = fopen(audio_path_str, "rb");
|
||||||
|
|
||||||
|
fseek(file, 78, SEEK_SET);
|
||||||
|
fread(samples, sizeof(short), n_samples, file);
|
||||||
|
fclose(file);
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
data[i] = samples[i]/32768.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
view->obj = obj;
|
||||||
|
view->data = (void *)data;
|
||||||
|
view->byte_size = sizeof(float) * n_samples;
|
||||||
|
view->readonly = true;
|
||||||
|
view->format = "f";
|
||||||
|
view->item_size = sizeof(float);
|
||||||
|
view->item_desc.components = NULL;
|
||||||
|
view->item_desc.length = 0;
|
||||||
|
view->ndim = 1;
|
||||||
|
view->shape = NULL;
|
||||||
|
view->sub_offsets = NULL;
|
||||||
|
view->private_data = NULL;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
jfk_reader_release_memory_view(const VALUE obj, rb_memory_view_t *view)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
jfk_reader_memory_view_available_p(const VALUE obj)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const rb_memory_view_entry_t jfk_reader_view_entry = {
|
||||||
|
jfk_reader_get_memory_view,
|
||||||
|
jfk_reader_release_memory_view,
|
||||||
|
jfk_reader_memory_view_available_p
|
||||||
|
};
|
||||||
|
|
||||||
|
void Init_jfk_reader(void)
|
||||||
|
{
|
||||||
|
VALUE cJFKReader = rb_define_class("JFKReader", rb_cObject);
|
||||||
|
rb_memory_view_register(cJFKReader, &jfk_reader_view_entry);
|
||||||
|
rb_define_method(cJFKReader, "initialize", jfk_reader_initialize, 1);
|
||||||
|
}
|
160
bindings/ruby/tests/test_callback.rb
Normal file
160
bindings/ruby/tests/test_callback.rb
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
require_relative "helper"
|
||||||
|
|
||||||
|
class TestCallback < TestBase
|
||||||
|
def setup
|
||||||
|
GC.start
|
||||||
|
@params = Whisper::Params.new
|
||||||
|
@whisper = Whisper::Context.new("base.en")
|
||||||
|
@audio = File.join(AUDIO)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_new_segment_callback
|
||||||
|
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||||
|
assert_kind_of Integer, n_new
|
||||||
|
assert n_new > 0
|
||||||
|
assert_same @whisper, context
|
||||||
|
|
||||||
|
n_segments = context.full_n_segments
|
||||||
|
n_new.times do |i|
|
||||||
|
i_segment = n_segments - 1 + i
|
||||||
|
start_time = context.full_get_segment_t0(i_segment) * 10
|
||||||
|
end_time = context.full_get_segment_t1(i_segment) * 10
|
||||||
|
text = context.full_get_segment_text(i_segment)
|
||||||
|
|
||||||
|
assert_kind_of Integer, start_time
|
||||||
|
assert start_time >= 0
|
||||||
|
assert_kind_of Integer, end_time
|
||||||
|
assert end_time > 0
|
||||||
|
assert_match /ask not what your country can do for you, ask what you can do for your country/, text if i_segment == 0
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_new_segment_callback_closure
|
||||||
|
search_word = "what"
|
||||||
|
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||||
|
n_segments = context.full_n_segments
|
||||||
|
n_new.times do |i|
|
||||||
|
i_segment = n_segments - 1 + i
|
||||||
|
text = context.full_get_segment_text(i_segment)
|
||||||
|
if text.include?(search_word)
|
||||||
|
t0 = context.full_get_segment_t0(i_segment)
|
||||||
|
t1 = context.full_get_segment_t1(i_segment)
|
||||||
|
raise "search word '#{search_word}' found at between #{t0} and #{t1}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_raise RuntimeError do
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_new_segment_callback_user_data
|
||||||
|
udata = Object.new
|
||||||
|
@params.new_segment_callback_user_data = udata
|
||||||
|
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||||
|
assert_same udata, user_data
|
||||||
|
}
|
||||||
|
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_new_segment_callback_user_data_gc
|
||||||
|
@params.new_segment_callback_user_data = "My user data"
|
||||||
|
@params.new_segment_callback = ->(context, state, n_new, user_data) {
|
||||||
|
assert_equal "My user data", user_data
|
||||||
|
}
|
||||||
|
GC.start
|
||||||
|
|
||||||
|
assert_same @whisper, @whisper.transcribe(@audio, @params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_progress_callback
|
||||||
|
first = nil
|
||||||
|
last = nil
|
||||||
|
@params.progress_callback = ->(context, state, progress, user_data) {
|
||||||
|
assert_kind_of Integer, progress
|
||||||
|
assert 0 <= progress && progress <= 100
|
||||||
|
assert_same @whisper, context
|
||||||
|
first = progress if first.nil?
|
||||||
|
last = progress
|
||||||
|
}
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert_equal 0, first
|
||||||
|
assert_equal 100, last
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_progress_callback_user_data
|
||||||
|
udata = Object.new
|
||||||
|
@params.progress_callback_user_data = udata
|
||||||
|
@params.progress_callback = ->(context, state, n_new, user_data) {
|
||||||
|
assert_same udata, user_data
|
||||||
|
}
|
||||||
|
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_on_progress
|
||||||
|
first = nil
|
||||||
|
last = nil
|
||||||
|
@params.on_progress do |progress|
|
||||||
|
assert_kind_of Integer, progress
|
||||||
|
assert 0 <= progress && progress <= 100
|
||||||
|
first = progress if first.nil?
|
||||||
|
last = progress
|
||||||
|
end
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert_equal 0, first
|
||||||
|
assert_equal 100, last
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_abort_callback
|
||||||
|
i = 0
|
||||||
|
@params.abort_callback = ->(user_data) {
|
||||||
|
assert_nil user_data
|
||||||
|
i += 1
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert i > 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_abort_callback_abort
|
||||||
|
i = 0
|
||||||
|
@params.abort_callback = ->(user_data) {
|
||||||
|
i += 1
|
||||||
|
return i == 3
|
||||||
|
}
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert_equal 3, i
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_abort_callback_user_data
|
||||||
|
udata = Object.new
|
||||||
|
@params.abort_callback_user_data = udata
|
||||||
|
yielded = nil
|
||||||
|
@params.abort_callback = ->(user_data) {
|
||||||
|
yielded = user_data
|
||||||
|
}
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert_same udata, yielded
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_abort_on
|
||||||
|
do_abort = false
|
||||||
|
aborted_from_callback = false
|
||||||
|
@params.on_new_segment do |segment|
|
||||||
|
do_abort = true if segment.text.match? /ask/
|
||||||
|
end
|
||||||
|
i = 0
|
||||||
|
@params.abort_on do
|
||||||
|
i += 1
|
||||||
|
do_abort
|
||||||
|
end
|
||||||
|
@whisper.transcribe(@audio, @params)
|
||||||
|
assert i > 0
|
||||||
|
end
|
||||||
|
end
|
20
bindings/ruby/tests/test_error.rb
Normal file
20
bindings/ruby/tests/test_error.rb
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
require_relative "helper"
|
||||||
|
|
||||||
|
class TestError < TestBase
|
||||||
|
def test_error
|
||||||
|
error = Whisper::Error.new(-2)
|
||||||
|
assert_equal "failed to compute log mel spectrogram", error.message
|
||||||
|
assert_equal -2, error.code
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_unknown_error
|
||||||
|
error = Whisper::Error.new(-20)
|
||||||
|
assert_equal "unknown error", error.message
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_non_int_code
|
||||||
|
assert_raise TypeError do
|
||||||
|
error = Whisper::Error.new("non int")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
109
bindings/ruby/tests/test_model.rb
Normal file
109
bindings/ruby/tests/test_model.rb
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
require_relative "helper"
|
||||||
|
require "pathname"
|
||||||
|
|
||||||
|
class TestModel < TestBase
|
||||||
|
def test_model
|
||||||
|
whisper = Whisper::Context.new("base.en")
|
||||||
|
assert_instance_of Whisper::Model, whisper.model
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_attributes
|
||||||
|
whisper = Whisper::Context.new("base.en")
|
||||||
|
model = whisper.model
|
||||||
|
|
||||||
|
assert_equal 51864, model.n_vocab
|
||||||
|
assert_equal 1500, model.n_audio_ctx
|
||||||
|
assert_equal 512, model.n_audio_state
|
||||||
|
assert_equal 8, model.n_audio_head
|
||||||
|
assert_equal 6, model.n_audio_layer
|
||||||
|
assert_equal 448, model.n_text_ctx
|
||||||
|
assert_equal 512, model.n_text_state
|
||||||
|
assert_equal 8, model.n_text_head
|
||||||
|
assert_equal 6, model.n_text_layer
|
||||||
|
assert_equal 80, model.n_mels
|
||||||
|
assert_equal 1, model.ftype
|
||||||
|
assert_equal "base", model.type
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_gc
|
||||||
|
model = Whisper::Context.new("base.en").model
|
||||||
|
GC.start
|
||||||
|
|
||||||
|
assert_equal 51864, model.n_vocab
|
||||||
|
assert_equal 1500, model.n_audio_ctx
|
||||||
|
assert_equal 512, model.n_audio_state
|
||||||
|
assert_equal 8, model.n_audio_head
|
||||||
|
assert_equal 6, model.n_audio_layer
|
||||||
|
assert_equal 448, model.n_text_ctx
|
||||||
|
assert_equal 512, model.n_text_state
|
||||||
|
assert_equal 8, model.n_text_head
|
||||||
|
assert_equal 6, model.n_text_layer
|
||||||
|
assert_equal 80, model.n_mels
|
||||||
|
assert_equal 1, model.ftype
|
||||||
|
assert_equal "base", model.type
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_pathname
|
||||||
|
path = Pathname(Whisper::Model.pre_converted_models["base.en"].to_path)
|
||||||
|
whisper = Whisper::Context.new(path)
|
||||||
|
model = whisper.model
|
||||||
|
|
||||||
|
assert_equal 51864, model.n_vocab
|
||||||
|
assert_equal 1500, model.n_audio_ctx
|
||||||
|
assert_equal 512, model.n_audio_state
|
||||||
|
assert_equal 8, model.n_audio_head
|
||||||
|
assert_equal 6, model.n_audio_layer
|
||||||
|
assert_equal 448, model.n_text_ctx
|
||||||
|
assert_equal 512, model.n_text_state
|
||||||
|
assert_equal 8, model.n_text_head
|
||||||
|
assert_equal 6, model.n_text_layer
|
||||||
|
assert_equal 80, model.n_mels
|
||||||
|
assert_equal 1, model.ftype
|
||||||
|
assert_equal "base", model.type
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_auto_download
|
||||||
|
path = Whisper::Model.pre_converted_models["base.en"].to_path
|
||||||
|
|
||||||
|
assert_path_exist path
|
||||||
|
assert_equal 147964211, File.size(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_uri_string
|
||||||
|
path = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"
|
||||||
|
whisper = Whisper::Context.new(path)
|
||||||
|
model = whisper.model
|
||||||
|
|
||||||
|
assert_equal 51864, model.n_vocab
|
||||||
|
assert_equal 1500, model.n_audio_ctx
|
||||||
|
assert_equal 512, model.n_audio_state
|
||||||
|
assert_equal 8, model.n_audio_head
|
||||||
|
assert_equal 6, model.n_audio_layer
|
||||||
|
assert_equal 448, model.n_text_ctx
|
||||||
|
assert_equal 512, model.n_text_state
|
||||||
|
assert_equal 8, model.n_text_head
|
||||||
|
assert_equal 6, model.n_text_layer
|
||||||
|
assert_equal 80, model.n_mels
|
||||||
|
assert_equal 1, model.ftype
|
||||||
|
assert_equal "base", model.type
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_uri
|
||||||
|
path = URI("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin")
|
||||||
|
whisper = Whisper::Context.new(path)
|
||||||
|
model = whisper.model
|
||||||
|
|
||||||
|
assert_equal 51864, model.n_vocab
|
||||||
|
assert_equal 1500, model.n_audio_ctx
|
||||||
|
assert_equal 512, model.n_audio_state
|
||||||
|
assert_equal 8, model.n_audio_head
|
||||||
|
assert_equal 6, model.n_audio_layer
|
||||||
|
assert_equal 448, model.n_text_ctx
|
||||||
|
assert_equal 512, model.n_text_state
|
||||||
|
assert_equal 8, model.n_text_head
|
||||||
|
assert_equal 6, model.n_text_layer
|
||||||
|
assert_equal 80, model.n_mels
|
||||||
|
assert_equal 1, model.ftype
|
||||||
|
assert_equal "base", model.type
|
||||||
|
end
|
||||||
|
end
|
31
bindings/ruby/tests/test_package.rb
Normal file
31
bindings/ruby/tests/test_package.rb
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
require_relative "helper"
|
||||||
|
require 'tempfile'
|
||||||
|
require 'tmpdir'
|
||||||
|
require 'shellwords'
|
||||||
|
|
||||||
|
class TestPackage < TestBase
|
||||||
|
def test_build
|
||||||
|
Tempfile.create do |file|
|
||||||
|
assert system("gem", "build", "whispercpp.gemspec", "--output", file.to_path.shellescape, exception: true)
|
||||||
|
assert file.size > 0
|
||||||
|
assert_path_exist file.to_path
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
sub_test_case "Building binary on installation" do
|
||||||
|
def setup
|
||||||
|
system "rake", "build", exception: true
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_install
|
||||||
|
match_data = `rake -Tbuild`.match(/(whispercpp-(.+)\.gem)/)
|
||||||
|
filename = match_data[1]
|
||||||
|
version = match_data[2]
|
||||||
|
basename = "whisper.#{RbConfig::CONFIG["DLEXT"]}"
|
||||||
|
Dir.mktmpdir do |dir|
|
||||||
|
system "gem", "install", "--install-dir", dir.shellescape, "--no-document", "pkg/#{filename.shellescape}", exception: true
|
||||||
|
assert_path_exist File.join(dir, "gems/whispercpp-#{version}/lib", basename)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user