Skip to content

Commit

Permalink
feat(website): Add configurable token count encoding and change defau…
Browse files Browse the repository at this point in the history
…lt to o200k_base
  • Loading branch information
yamadashy committed Jan 5, 2025
1 parent 59cb566 commit 76904c9
Show file tree
Hide file tree
Showing 15 changed files with 70 additions and 13 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ This format provides a clean, readable structure that is both human-friendly and
- `--remote <url>`: Process a remote Git repository
- `--remote-branch <name>`: Specify the remote branch name, tag, or commit hash (defaults to repository default branch)
- `--no-security-check`: Disable security check
- `--token-count-encoding <encoding>`: Specify token count encoding (e.g., `o200k_base`, `cl100k_base`)
- `--verbose`: Enable verbose logging

Examples:
Expand Down Expand Up @@ -409,6 +410,7 @@ Here's an explanation of the configuration options:
|`ignore.useDefaultPatterns`| Whether to use default ignore patterns |`true`|
|`ignore.customPatterns`| Additional patterns to ignore (using [glob patterns](https://github.com/mrmlnc/fast-glob?tab=readme-ov-file#pattern-syntax)) |`[]`|
|`security.enableSecurityCheck`| Whether to perform security checks on files |`true`|
|`tokenCount.encoding`| Token count encoding for AI model context limits (e.g., `o200k_base`, `cl100k_base`) |`"o200k_base"`|

Example configuration:

Expand Down Expand Up @@ -436,6 +438,9 @@ Example configuration:
},
"security": {
"enableSecurityCheck": true
},
"tokenCount": {
"encoding": "o200k_base"
}
}
```
Expand Down
3 changes: 3 additions & 0 deletions repomix.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,8 @@
},
"security": {
"enableSecurityCheck": true
},
"tokenCount": {
"encoding": "o200k_base"
}
}
3 changes: 3 additions & 0 deletions src/cli/actions/defaultAction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ const buildCliConfig = (options: CliOptions): RepomixConfigCli => {
if (options.removeEmptyLines !== undefined) {
cliConfig.output = { ...cliConfig.output, removeEmptyLines: options.removeEmptyLines };
}
if (options.tokenCountEncoding) {
cliConfig.tokenCount = { encoding: options.tokenCountEncoding };
}

Check warning on line 132 in src/cli/actions/defaultAction.ts

View check run for this annotation

Codecov / codecov/patch

src/cli/actions/defaultAction.ts#L131-L132

Added lines #L131 - L132 were not covered by tests

try {
return repomixConfigCliSchema.parse(cliConfig);
Expand Down
6 changes: 4 additions & 2 deletions src/cli/cliRun.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ export interface CliOptions extends OptionValues {
securityCheck?: boolean;
fileSummary?: boolean;
directoryStructure?: boolean;
removeComments?: boolean; // 追加
removeEmptyLines?: boolean; // 追加
removeComments?: boolean;
removeEmptyLines?: boolean;
tokenCountEncoding?: string;
}

export const run = async () => {
Expand All @@ -54,6 +55,7 @@ export const run = async () => {
.option('--init', 'initialize a new repomix.config.json file')
.option('--global', 'use global configuration (only applicable with --init)')
.option('--remote <url>', 'process a remote Git repository')
.option('--token-count-encoding <encoding>', 'specify token count encoding (e.g., o200k_base, cl100k_base)')
.option(
'--remote-branch <name>',
'specify the remote branch name, tag, or commit hash (defaults to repository default branch)',
Expand Down
14 changes: 14 additions & 0 deletions src/config/configSchema.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type { TiktokenEncoding } from 'tiktoken';
import { z } from 'zod';

// Output style enum
Expand Down Expand Up @@ -42,6 +43,11 @@ export const repomixConfigBaseSchema = z.object({
enableSecurityCheck: z.boolean().optional(),
})
.optional(),
tokenCount: z
.object({
encoding: z.string().optional(),
})
.optional(),
});

// Default config schema with default values
Expand Down Expand Up @@ -75,6 +81,14 @@ export const repomixConfigDefaultSchema = z.object({
enableSecurityCheck: z.boolean().default(true),
})
.default({}),
tokenCount: z
.object({
encoding: z
.string()
.default('o200k_base')
.transform((val) => val as TiktokenEncoding),
})
.default({}),
});

export const repomixConfigFileSchema = repomixConfigBaseSchema;
Expand Down
5 changes: 4 additions & 1 deletion src/core/metrics/calculateMetrics.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { TiktokenEncoding } from 'tiktoken';
import type { RepomixConfigMerged } from '../../config/configSchema.js';
import type { RepomixProgressCallback } from '../../shared/types.js';
import type { ProcessedFile } from '../file/fileTypes.js';
import { TokenCounter } from '../tokenCount/tokenCount.js';
Expand All @@ -16,8 +18,9 @@ export const calculateMetrics = async (
processedFiles: ProcessedFile[],
output: string,
progressCallback: RepomixProgressCallback,
config: RepomixConfigMerged,
): Promise<CalculateMetricsResult> => {
const tokenCounter = new TokenCounter();
const tokenCounter = new TokenCounter(config.tokenCount.encoding);

progressCallback('Calculating metrics...');
const fileMetrics = await calculateAllFileMetrics(processedFiles, tokenCounter, progressCallback);
Expand Down
2 changes: 1 addition & 1 deletion src/core/packager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ export const pack = async (

await deps.copyToClipboardIfEnabled(output, progressCallback, config);

const metrics = await deps.calculateMetrics(processedFiles, output, progressCallback);
const metrics = await deps.calculateMetrics(processedFiles, output, progressCallback, config);

return {
...metrics,
Expand Down
8 changes: 4 additions & 4 deletions src/core/tokenCount/tokenCount.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import { type Tiktoken, get_encoding } from 'tiktoken';
import { type Tiktoken, type TiktokenEncoding, get_encoding } from 'tiktoken';
import { logger } from '../../shared/logger.js';

export class TokenCounter {
private encoding: Tiktoken;

constructor() {
// Setup encoding
this.encoding = get_encoding('cl100k_base');
constructor(encodingName: TiktokenEncoding) {
// Setup encoding with the specified model
this.encoding = get_encoding(encodingName);
}

public countTokens(content: string, filePath?: string): number {
Expand Down
3 changes: 3 additions & 0 deletions tests/cli/actions/defaultAction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ describe('defaultAction', () => {
security: {
enableSecurityCheck: true,
},
tokenCount: {
encoding: 'o200k_base',
},
});
vi.mocked(packager.pack).mockResolvedValue({
totalFiles: 10,
Expand Down
6 changes: 6 additions & 0 deletions tests/cli/cliRun.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ describe('cliRun', () => {
security: {
enableSecurityCheck: true,
},
tokenCount: {
encoding: 'o200k_base',
},
} satisfies RepomixConfigMerged,
packResult: {
totalFiles: 0,
Expand Down Expand Up @@ -98,6 +101,9 @@ describe('cliRun', () => {
security: {
enableSecurityCheck: true,
},
tokenCount: {
encoding: 'o200k_base',
},
} satisfies RepomixConfigMerged,
packResult: {
totalFiles: 0,
Expand Down
6 changes: 6 additions & 0 deletions tests/config/configSchema.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ describe('configSchema', () => {
security: {
enableSecurityCheck: true,
},
tokenCount: {
encoding: 'o200k_base',
},
};
expect(repomixConfigDefaultSchema.parse(validConfig)).toEqual(validConfig);
});
Expand Down Expand Up @@ -161,6 +164,9 @@ describe('configSchema', () => {
security: {
enableSecurityCheck: true,
},
tokenCount: {
encoding: 'o200k_base',
},
};
expect(repomixConfigMergedSchema.parse(validConfig)).toEqual(validConfig);
});
Expand Down
5 changes: 4 additions & 1 deletion tests/core/metrics/calculateMetrics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { calculateAllFileMetrics } from '../../../src/core/metrics/calculateAllF
import { calculateMetrics } from '../../../src/core/metrics/calculateMetrics.js';
import { TokenCounter } from '../../../src/core/tokenCount/tokenCount.js';
import type { RepomixProgressCallback } from '../../../src/shared/types.js';
import { createMockConfig } from '../../testing/testUtils.js';

vi.mock('../../../src/core/tokenCount/tokenCount.js');
vi.mock('../../../src/core/metrics/aggregateMetrics.js');
Expand Down Expand Up @@ -46,7 +47,9 @@ describe('calculateMetrics', () => {
};
(aggregateMetrics as unknown as Mock).mockReturnValue(aggregatedResult);

const result = await calculateMetrics(processedFiles, output, progressCallback);
const config = createMockConfig();

const result = await calculateMetrics(processedFiles, output, progressCallback, config);

expect(progressCallback).toHaveBeenCalledWith('Calculating metrics...');
expect(calculateAllFileMetrics).toHaveBeenCalledWith(processedFiles, mockTokenCounter, progressCallback);
Expand Down
7 changes: 6 additions & 1 deletion tests/core/packager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ describe('packager', () => {
expect(mockDeps.generateOutput).toHaveBeenCalledWith('root', mockConfig, mockProcessedFiles, mockFilePaths);
expect(mockDeps.writeOutputToDisk).toHaveBeenCalledWith(mockOutput, mockConfig);
expect(mockDeps.copyToClipboardIfEnabled).toHaveBeenCalledWith(mockOutput, progressCallback, mockConfig);
expect(mockDeps.calculateMetrics).toHaveBeenCalledWith(mockProcessedFiles, mockOutput, progressCallback);
expect(mockDeps.calculateMetrics).toHaveBeenCalledWith(
mockProcessedFiles,
mockOutput,
progressCallback,
mockConfig,
);

// Check the result of pack function
expect(result.totalFiles).toBe(2);
Expand Down
6 changes: 3 additions & 3 deletions tests/core/tokenCount/tokenCount.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ describe('TokenCounter', () => {
vi.mocked(get_encoding).mockReturnValue(mockEncoder as unknown as Tiktoken);

// Create new TokenCounter instance
tokenCounter = new TokenCounter();
tokenCounter = new TokenCounter('o200k_base');
});

afterEach(() => {
tokenCounter.free();
vi.resetAllMocks();
});

test('should initialize with cl100k_base encoding', () => {
expect(get_encoding).toHaveBeenCalledWith('cl100k_base');
test('should initialize with o200k_base encoding', () => {
expect(get_encoding).toHaveBeenCalledWith('o200k_base');
});

test('should correctly count tokens for simple text', () => {
Expand Down
4 changes: 4 additions & 0 deletions tests/testing/testUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ export const createMockConfig = (config: DeepPartial<RepomixConfigMerged> = {}):
...defaultConfig.security,
...config.security,
},
tokenCount: {
...defaultConfig.tokenCount,
...config.tokenCount,
},
};
};

Expand Down

0 comments on commit 76904c9

Please sign in to comment.