{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":622737062,"defaultBranch":"master","name":"llama.cpp","ownerLogin":"unbounded","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2023-04-03T00:37:12.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/789931?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1683184826.0","currentOid":""},"activityList":{"items":[{"before":"f87dfc3d85d6cc01a39fbb3bcc4ef699f954e42a","after":"8c81d8297414f8dfff155584c4b06abdd67d9d69","ref":"refs/heads/continuous-q4_2c","pushedAt":"2023-05-06T08:08:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_2c: faster dotprod for m1","shortMessageHtmlLink":"q4_2c: faster dotprod for m1"}},{"before":"d53f76760d7b067fd0cef67a994a90e662bdfb50","after":"76692c90cdb909065be522910a5b7c60fa3a062b","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-05-04T07:50:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: avoid _mm512_loadu_epi64 instruction\n\nNot supported on some GCC versions","shortMessageHtmlLink":"q4_0c: avoid _mm512_loadu_epi64 instruction"}},{"before":"9c2dc16e62ee2d746360e438be986fe293682941","after":"f87dfc3d85d6cc01a39fbb3bcc4ef699f954e42a","ref":"refs/heads/continuous-q4_2c","pushedAt":"2023-05-04T07:38:29.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_2c quantization method\n\nChange to continuous layout version of q4_2","shortMessageHtmlLink":"q4_2c quantization method"}},{"before":null,"after":"9c2dc16e62ee2d746360e438be986fe293682941","ref":"refs/heads/continuous-q4_2c","pushedAt":"2023-05-04T07:20:26.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_2c quantization method\n\nChange to continuous layout version of q4_2","shortMessageHtmlLink":"q4_2c quantization method"}},{"before":"58e10f29b61fe4907bafee91b706b70da0905dea","after":"d53f76760d7b067fd0cef67a994a90e662bdfb50","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-05-04T07:20:18.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: disable prefetching on M1","shortMessageHtmlLink":"q4_0c: disable prefetching on M1"}},{"before":"944c7037cca86b005a59f78ff9993a6b6c4807a9","after":"5808fcf7ac0acfb5638233e92e887f2dd3cfed70","ref":"refs/heads/q4_0_range_fix","pushedAt":"2023-04-24T18:52:16.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Use full range for q4_2 quantization","shortMessageHtmlLink":"Use full range for q4_2 quantization"}},{"before":"64a6a29c9fc82eb8de1cf8bd54ad5b4f365b0bf6","after":"58e10f29b61fe4907bafee91b706b70da0905dea","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-24T16:56:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: prefetch on AVX-512 and ARM\n\nSeems significant especially for evaluation time","shortMessageHtmlLink":"q4_0c: prefetch on AVX-512 and ARM"}},{"before":"4f149c2364d60dc8922542c6f29a426e457863c6","after":"64a6a29c9fc82eb8de1cf8bd54ad5b4f365b0bf6","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-22T00:46:44.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: Arm Neon acceleration\n\nMostly copied from the q4_0 implementation","shortMessageHtmlLink":"q4_0c: Arm Neon acceleration"}},{"before":"48c84f77feda27fb0be96f21dbb95a04b82f1c1d","after":"4f149c2364d60dc8922542c6f29a426e457863c6","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-19T21:59:04.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: AVX512 vec_dot and quantize impl","shortMessageHtmlLink":"q4_0c: AVX512 vec_dot and quantize impl"}},{"before":"e8f4e6e780b538b8cd0bab072219626b33335063","after":"48c84f77feda27fb0be96f21dbb95a04b82f1c1d","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-19T20:56:28.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: AVX512 vec_dot and quantize impl","shortMessageHtmlLink":"q4_0c: AVX512 vec_dot and quantize impl"}},{"before":"947d2f4530e8915c94fcb4a73b6296c4310e479e","after":"e8f4e6e780b538b8cd0bab072219626b33335063","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-19T18:09:42.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: AVX512 vec_dot and quantize impl","shortMessageHtmlLink":"q4_0c: AVX512 vec_dot and quantize impl"}},{"before":"f868a3ca3dd46bf7ea9f834452282fcb652ff7f4","after":"947d2f4530e8915c94fcb4a73b6296c4310e479e","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-19T01:39:20.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c: AVX512 vec_dot impl","shortMessageHtmlLink":"q4_0c: AVX512 vec_dot impl"}},{"before":null,"after":"f868a3ca3dd46bf7ea9f834452282fcb652ff7f4","ref":"refs/heads/quantization-continuous-layouts","pushedAt":"2023-04-17T22:33:10.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q4_0c continous row layout\n\nIntroduce alternative quantized formats q4_0c and q8_0c, corresponding\nexactly to q4_0 and q8_0, except that quantized values and scales are\nlaid out continuously in memory, and the nibbles in q4_0 are rearranged.\n\nThis should simplify SIMD implementations, at the expense of slighly\nmore complex scalar implementations.","shortMessageHtmlLink":"q4_0c continous row layout"}},{"before":"cd3bc3701f835e9e52ad532617460cde38e25bc4","after":"e95a8336d58da45fae6fce2175924cb616a0aa39","ref":"refs/heads/quantize-tests","pushedAt":"2023-04-15T22:34:20.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"test-quantize: fix for q8_0 intermediates","shortMessageHtmlLink":"test-quantize: fix for q8_0 intermediates"}},{"before":null,"after":"2847466d0d3eddcecba99f1cea26b35e8a2f6750","ref":"refs/heads/q5_0-quantization","pushedAt":"2023-04-14T21:51:28.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"q5_0 quantization\n\nLinear 5-bit quantization; similar to q4_0 but one more bit, and using\na float16 to store the magnitude.\nThis makes it slightly smaller than q4_1, but with better error\nmeasurements.","shortMessageHtmlLink":"q5_0 quantization"}},{"before":"2a0ffeb9c122fb14b43caf35c0a02aef371d6668","after":"cd3bc3701f835e9e52ad532617460cde38e25bc4","ref":"refs/heads/quantize-tests","pushedAt":"2023-04-14T21:44:09.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"test-quantize: remove\n\nPer PR comment, subsumed by test-quantize-fns","shortMessageHtmlLink":"test-quantize: remove"}},{"before":"0212090e6b166f358101e663b5e25b5810f031a7","after":"2a0ffeb9c122fb14b43caf35c0a02aef371d6668","ref":"refs/heads/quantize-tests","pushedAt":"2023-04-14T21:18:11.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"test-quantize: remove\n\nPer PR comment, subsumed by test-quantize-fns","shortMessageHtmlLink":"test-quantize: remove"}},{"before":null,"after":"0212090e6b166f358101e663b5e25b5810f031a7","ref":"refs/heads/quantize-tests","pushedAt":"2023-04-13T22:34:44.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Unit test for quantization functions\n\nUse the ggml_internal_get_quantize_fn function to loop through all\nquantization formats and run a sanity check on the result.\n\nAlso add a microbenchmark that times these functions directly without\nrunning the rest of the GGML graph.","shortMessageHtmlLink":"Unit test for quantization functions"}},{"before":"ee8f3c74a0ffd0f39c16fc253eed7a008a95a425","after":"24eb42ae321fedcc3bb413f80e4aeab12a052da4","ref":"refs/heads/q4-q-harder","pushedAt":"2023-04-08T22:17:38.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Only keep current elements in heap\n\nKeep QK elements instead of QK*16, generating the next event for each\nvisit.","shortMessageHtmlLink":"Only keep current elements in heap"}},{"before":"7831a02be3d87f0dc6da9d93c888797e25ea855f","after":"ee8f3c74a0ffd0f39c16fc253eed7a008a95a425","ref":"refs/heads/q4-q-harder","pushedAt":"2023-04-07T23:14:42.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Really slow RMS \"optimal\" scaling for q4_0\n\nUse a sweep line approach to scan all configurations of quantization,\nexamining every changeover point where a quantize value changes,\nand find the optimal scaling for each configuration analytically.","shortMessageHtmlLink":"Really slow RMS \"optimal\" scaling for q4_0"}},{"before":null,"after":"7831a02be3d87f0dc6da9d93c888797e25ea855f","ref":"refs/heads/q4-q-harder","pushedAt":"2023-04-07T23:07:19.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Really slow RMS \"optimal\" scaling for q4_0\n\nUse a sweep line approach to scan all configurations of quantization,\nexamining every changeover point where a quantize value changes,\nand find the optimal scaling for each configuration analytically.","shortMessageHtmlLink":"Really slow RMS \"optimal\" scaling for q4_0"}},{"before":"72905b6631d7d913de5be6702ea36e5347458cee","after":"944c7037cca86b005a59f78ff9993a6b6c4807a9","ref":"refs/heads/q4_0_range_fix","pushedAt":"2023-04-05T20:57:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Fix type error in quantize_row_q4_1 for Arm NEON","shortMessageHtmlLink":"Fix type error in quantize_row_q4_1 for Arm NEON"}},{"before":"63cfa43200860fa81a90439d172173f93f6fc994","after":"41d4a863c901f69045d260991a08a0f202581edc","ref":"refs/heads/quantize-stats","pushedAt":"2023-04-05T20:17:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Remove \"internal\" header files\n\nMove into main header with comment not to use, per PR feedback","shortMessageHtmlLink":"Remove \"internal\" header files"}},{"before":"d4915074c473c5c4fa9c538292bc1e569d2fd222","after":"63cfa43200860fa81a90439d172173f93f6fc994","ref":"refs/heads/quantize-stats","pushedAt":"2023-04-05T01:27:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"quantize-stats: add option to test against reference quantization\n\nExpose reference quantization implementation and add option to use it\nfor tests.","shortMessageHtmlLink":"quantize-stats: add option to test against reference quantization"}},{"before":"da1bcb5f564ce15654505f78d00e4cd761d57857","after":"72905b6631d7d913de5be6702ea36e5347458cee","ref":"refs/heads/q4_0_range_fix","pushedAt":"2023-04-05T00:57:04.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Update quantize_row_q4_1 for PowerPC\n\nUntested","shortMessageHtmlLink":"Update quantize_row_q4_1 for PowerPC"}},{"before":"5b1143ed932f1b9f8e77256df075e25ca72321f6","after":"d4915074c473c5c4fa9c538292bc1e569d2fd222","ref":"refs/heads/quantize-stats","pushedAt":"2023-04-03T22:31:52.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"quantize-stats: misc improvements\n\nShow RMSE instead of MSE - keeps similar range to the other metrics.\nRegex match on layer pattern.","shortMessageHtmlLink":"quantize-stats: misc improvements"}},{"before":"ed667e95816a74126252cca9a7f6390a3cf3ace0","after":"5b1143ed932f1b9f8e77256df075e25ca72321f6","ref":"refs/heads/quantize-stats","pushedAt":"2023-04-03T21:34:25.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"quantize-stats: show percentiles\n\nShow some error percentiles, should be less noisy than just the max error.","shortMessageHtmlLink":"quantize-stats: show percentiles"}},{"before":"0111e278c9f9e0ac14691d86277ecf246504ed99","after":"da1bcb5f564ce15654505f78d00e4cd761d57857","ref":"refs/heads/q4_0_range_fix","pushedAt":"2023-04-03T02:34:22.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Fix q4_0 test","shortMessageHtmlLink":"Fix q4_0 test"}},{"before":null,"after":"0111e278c9f9e0ac14691d86277ecf246504ed99","ref":"refs/heads/q4_0_range_fix","pushedAt":"2023-04-03T01:27:44.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"Use full range for q4_0 quantization\n\nBy keeping the sign of the highest magnitude, we can make sure the\nhighest value maps to -8, which is currently unused.\nThis is a bit of a freebie since it is fully backwards compatible with\nthe current format.\n\nquantize-stats output:\nbefore(7B):\nq4_0                                              : mse 0.00000492, maxerr 0.14257812\nafter(7B):\nq4_0                                              : mse 0.00000386, maxerr 0.18200684\n\n(Most layers have reduced maxerr under this rule, but the total max\nerror is indeed slightly higher)","shortMessageHtmlLink":"Use full range for q4_0 quantization"}},{"before":"687ce378f102ab2ceec3550bad87eb61ed5ec422","after":"ed667e95816a74126252cca9a7f6390a3cf3ace0","ref":"refs/heads/quantize-stats","pushedAt":"2023-04-03T01:26:40.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"unbounded","name":null,"path":"/unbounded","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/789931?s=80&v=4"},"commit":{"message":"quantize-stats command\n\nCommand that calculates some statistics over the errors introduced by\nquantization, at the moment mean square error and max error for layer\nweights. Should be useful for testing quantization improvements.\n\nNeeds some internal state from ggml and llama that should not be part of\nthe public API.","shortMessageHtmlLink":"quantize-stats command"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAADJ0d1gwA","startCursor":null,"endCursor":null}},"title":"Activity · unbounded/llama.cpp"}