Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
3.14.3
  • Loading branch information
rjmacarthy committed Aug 12, 2024
1 parent 714a5e3 commit b41bde7
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 263 deletions.
271 changes: 95 additions & 176 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "twinny",
"displayName": "twinny - AI Code Completion and Chat",
"description": "Locally hosted AI code completion plugin for vscode",
"version": "3.14.2",
"version": "3.14.3",
"icon": "assets/icon.png",
"keywords": [
"code-inference",
Expand Down Expand Up @@ -445,7 +445,7 @@
"web-tree-sitter": "^0.22.1"
},
"dependencies": {
"@lancedb/lancedb": "^0.5.2",
"@lancedb/lancedb": "^0.9.0",
"@tiptap/extension-mention": "^2.5.9",
"@tiptap/extension-placeholder": "^2.5.9",
"@tiptap/pm": "^2.5.9",
Expand Down
15 changes: 13 additions & 2 deletions src/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ export const EXTENSION_CONTEXT_NAME = {
twinnyMaxChunkSize: 'twinnyMaxChunkSize',
twinnyMinChunkSize: 'twinnyMinChunkSize',
twinnyOverlapSize: 'twinnyOverlapSize',
twinnyRelevantFilePaths: 'twinnyRelevantFilePaths',
twinnyRelevantCodeSnippets: 'twinnyRelevantCodeSnippets',
twinnyVectorSearchMetric : 'twinnyVectorSearchMetric',
twinnySymmetryTab: 'twinnySymmetryTab'
}

Expand Down Expand Up @@ -401,6 +404,16 @@ export const EMBEDDING_IGNORE_LIST = [
'yml'
]

export const DEFAULT_RELEVANT_FILE_COUNT = 10
export const DEFAULT_RELEVANT_CODE_COUNT = 5
export const DEFAULT_VECTOR_SEARCH_METRIC = 'l2'

export const EMBEDDING_METRICS = [
'cosine',
'l2',
'dot'
]

export const MULTILINE_OUTSIDE = [
'class_body',
'class',
Expand Down Expand Up @@ -428,9 +441,7 @@ export const MULTILINE_TYPES = [...MULTILINE_OUTSIDE, ...MULTILINE_INSIDE]

export const MULTI_LINE_DELIMITERS = ['\n\n', '\r\n\r\n']

export const RELEVANT_FILE_COUNT = 10 // todo make this configurable

export const RELEVANT_CODE_COUNT = 5 // todo make this configurable

export const SYMMETRY_DATA_MESSAGE = {
disconnect: 'disconnect',
Expand Down
82 changes: 54 additions & 28 deletions src/extension/chat-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,26 @@ import * as path from 'path'
import * as fs from 'fs/promises'

import {
EXTENSION_CONTEXT_NAME,
EVENT_NAME,
WEBUI_TABS,
ACTIVE_CHAT_PROVIDER_STORAGE_KEY,
DEFAULT_RELEVANT_CODE_COUNT,
DEFAULT_RELEVANT_FILE_COUNT,
DEFAULT_RERANK_THRESHOLD,
DEFAULT_VECTOR_SEARCH_METRIC,
EVENT_NAME,
EXTENSION_CONTEXT_NAME,
EXTENSION_SESSION_NAME,
SYMMETRY_EMITTER_KEY,
SYSTEM,
USER,
RELEVANT_FILE_COUNT,
RELEVANT_CODE_COUNT,
SYMMETRY_EMITTER_KEY,
DEFAULT_RERANK_THRESHOLD,
EXTENSION_SESSION_NAME
WEBUI_TABS
} from '../common/constants'
import {
StreamResponse,
RequestBodyBase,
ServerMessage,
TemplateData,
Message,
StreamRequestOptions,
EmbeddedDocument
StreamRequestOptions
} from '../common/types'
import {
getChatDataFromProvider,
Expand All @@ -56,7 +56,6 @@ export class ChatService {
private _context?: ExtensionContext
private _controller?: AbortController
private _db?: EmbeddingDatabase
private _documents: EmbeddedDocument[] = []
private _keepAlive = this._config.get('keepAlive') as string | number
private _numPredictChat = this._config.get('numPredictChat') as number
private _promptTemplate = ''
Expand Down Expand Up @@ -121,9 +120,25 @@ export class ChatService {

if (!embedding) return []

const relevantFileCountContext = `${EVENT_NAME.twinnyGlobalContext}-${EXTENSION_CONTEXT_NAME.twinnyRelevantFilePaths}`
const stored = this._context?.globalState.get(
relevantFileCountContext
) as number
const relevantFileCount = Number(stored) || DEFAULT_RELEVANT_FILE_COUNT

const storedMetric = this._context?.globalState.get(
`${EVENT_NAME.twinnyGlobalContext}-${EXTENSION_CONTEXT_NAME.twinnyVectorSearchMetric}`
) as number

const metric = storedMetric || DEFAULT_VECTOR_SEARCH_METRIC

const filePaths =
(await this._db.getDocuments(embedding, RELEVANT_FILE_COUNT, table)) ||
[]
(await this._db.getDocuments(
embedding,
relevantFileCount,
table,
metric as 'cosine' | 'l2' | 'dot'
)) || []

if (!filePaths.length) return []

Expand Down Expand Up @@ -209,20 +224,27 @@ export class ChatService {
const rerankThreshold = this.getRerankThreshold()

if (await this._db.hasEmbeddingTable(table)) {
const relevantCodeCountContext = `${EVENT_NAME.twinnyGlobalContext}-${EXTENSION_CONTEXT_NAME.twinnyRelevantCodeSnippets}`
const stored = this._context?.globalState.get(
relevantCodeCountContext
) as number
const relevantCodeCount = Number(stored) || DEFAULT_RELEVANT_CODE_COUNT

const embedding = await this._db.fetchModelEmbedding(text)

if (!embedding) return ''

const query = relevantFiles?.length
? `file IN ("${relevantFiles.map((file) => file[0]).join('","')}")`
: ''
const storedMetric = this._context?.globalState.get(
`${EVENT_NAME.twinnyGlobalContext}-${EXTENSION_CONTEXT_NAME.twinnyVectorSearchMetric}`
) as number
const metric = storedMetric || DEFAULT_VECTOR_SEARCH_METRIC

const documents =
(await this._db.getDocuments(
embedding,
RELEVANT_CODE_COUNT,
relevantCodeCount,
table,
query
metric as 'cosine' | 'l2' | 'dot'
)) || []

const documentScores = await this._reranker.rerank(
Expand Down Expand Up @@ -443,23 +465,21 @@ export class ChatService {
} as ServerMessage<string>)
}

public async getRagContext(
text?: string,
): Promise<string | null> {

public async getRagContext(text?: string): Promise<string | null> {
const symmetryConnected = this._sessionManager?.get(
EXTENSION_SESSION_NAME.twinnySymmetryConnection
)

const workspaceMentioned = text?.includes('@workspace')

if (symmetryConnected || !workspaceMentioned)
return null
if (symmetryConnected || !workspaceMentioned) return null

updateLoadingMessage(this._view, 'Exploring knowledge base')

const relevantFiles = await this.getRelevantFiles(text)
const relevantCode = await this.getRelevantCode(text, relevantFiles)
const prompt = text?.replace(/@workspace/g, '')

const relevantFiles = await this.getRelevantFiles(prompt)
const relevantCode = await this.getRelevantCode(prompt, relevantFiles)

let combinedContext = ''

Expand Down Expand Up @@ -507,20 +527,26 @@ export class ChatService {
}

const ragContext = await this.getRagContext(text)

const cleanedText = text?.replace(/@workspace/g, '').trim()

if (ragContext) {
additionalContext += `Additional Context:\n${ragContext}\n\n`
}

const updatedMessages = [systemMessage, ...messages.slice(0, -1)]

if (additionalContext) {
const lastMessageContent = `${text}\n\n${additionalContext.trim()}`
const lastMessageContent = `${cleanedText}\n\n${additionalContext.trim()}`
updatedMessages.push({
role: USER,
content: lastMessageContent
})
} else {
updatedMessages.push(lastMessage)
updatedMessages.push({
...lastMessage,
content: cleanedText
})
}
updateLoadingMessage(this._view, 'Thinking')
const request = this.buildStreamRequest(updatedMessages)
Expand Down
68 changes: 52 additions & 16 deletions src/extension/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import {
import { TwinnyProvider } from './provider-manager'
import { getDocumentSplitChunks } from './utils'
import { IntoVector } from '@lancedb/lancedb/dist/arrow'
import { Logger } from '../common/logger'

const logger = new Logger()

export class EmbeddingDatabase {
private _config = vscode.workspace.getConfiguration('twinny')
Expand Down Expand Up @@ -87,26 +90,32 @@ export class EmbeddingDatabase {

private getAllFilePaths = async (dirPath: string): Promise<string[]> => {
let filePaths: string[] = []
const dirents = await fs.promises.readdir(dirPath, {
withFileTypes: true
})
const gitIgnoredFiles = this.readGitIgnoreFile()
const dirents = await fs.promises.readdir(dirPath, { withFileTypes: true })
const gitIgnoredFiles = this.readGitIgnoreFile() || []
const submodules = this.readGitSubmodulesFile()

const rootPath = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath || ''

for (const dirent of dirents) {
const fullPath = path.join(dirPath, dirent.name)
const relativePath = path.relative(rootPath, fullPath)

if (this.getIgnoreDirectory(dirent.name)) continue

if (submodules?.some((submodule) => fullPath.includes(submodule))) {
continue
}

if (
gitIgnoredFiles?.some(
(pattern) =>
minimatch(fullPath, pattern, { dot: true }) &&
gitIgnoredFiles.some((pattern) => {
const isIgnored =
minimatch(relativePath, pattern, { dot: true, matchBase: true }) &&
!pattern.startsWith('!')
)
if (isIgnored) {
logger.log(`Ignoring ${relativePath} due to pattern: ${pattern}`)
}
return isIgnored
})
) {
continue
}
Expand All @@ -131,13 +140,17 @@ export class EmbeddingDatabase {
{
location: vscode.ProgressLocation.Notification,
title: 'Embedding',
cancellable: true,
cancellable: true
},
async (progress) => {
if (!this._extensionContext) return
const promises = filePaths.map(async (filePath) => {
const content = await fs.promises.readFile(filePath, 'utf-8')
const chunks = await getDocumentSplitChunks(content, filePath, this._extensionContext)
const chunks = await getDocumentSplitChunks(
content,
filePath,
this._extensionContext
)
const filePathEmbedding = await this.fetchModelEmbedding(filePath)

this._filePaths.push({
Expand Down Expand Up @@ -207,11 +220,12 @@ export class EmbeddingDatabase {
vector: IntoVector,
limit: number,
tableName: string,
metric: 'cosine' | 'l2' | 'dot' = 'cosine',
where?: string
): Promise<EmbeddedDocument[] | undefined> {
try {
const table = await this._db?.openTable(tableName)
const query = await table?.search(vector).limit(limit)
const query = table?.search(vector).limit(limit).distanceType(metric) // add type assertion
if (where) query?.where(where)
return query?.toArray()
} catch (e) {
Expand All @@ -232,14 +246,36 @@ export class EmbeddingDatabase {
private readGitIgnoreFile(): string[] | undefined {
try {
const folders = vscode.workspace.workspaceFolders
if (!folders || folders.length === 0) return undefined
if (!folders || folders.length === 0) {
console.log('No workspace folders found')
return undefined
}

const rootPath = folders[0].uri.fsPath
if (!rootPath) return undefined
if (!rootPath) {
console.log('Root path is undefined')
return undefined
}

const gitIgnoreFilePath = path.join(rootPath, '.gitignore')
if (!fs.existsSync(gitIgnoreFilePath)) return undefined
const ignoreFileContent = fs.readFileSync(gitIgnoreFilePath).toString()
return ignoreFileContent.split('\n').filter((line: string) => line !== '')
if (!fs.existsSync(gitIgnoreFilePath)) {
console.log('.gitignore file not found at', gitIgnoreFilePath)
return undefined
}

const ignoreFileContent = fs.readFileSync(gitIgnoreFilePath, 'utf8')
return ignoreFileContent
.split('\n')
.map((line) => line.trim())
.filter((line) => line !== '' && !line.startsWith('#'))
.map((pattern) => {
if (pattern.endsWith('/')) {
return pattern + '**'
}
return pattern
})
} catch (e) {
console.error('Error reading .gitignore file:', e)
return undefined
}
}
Expand Down
15 changes: 7 additions & 8 deletions src/extension/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ export const getChunkOptions = (
const options = {
maxSize: Number(context.globalState.get(maxChunkSizeContext)) || 500,
minSize: Number(context.globalState.get(minChunkSizeContext)) || 50,
overlap: Number(context.globalState.get(overlap)) || 50
overlap: Number(context.globalState.get(overlap)) || 10
}

return options
Expand All @@ -519,7 +519,6 @@ export async function getDocumentSplitChunks(

const tree = parser.parse(content)
const chunks = getSplitChunks(tree.rootNode, options)

return combineChunks(chunks, options)
} catch (error) {
console.error(`Error parsing file ${filePath}: ${error}`)
Expand Down Expand Up @@ -611,12 +610,12 @@ export const logStreamOptions = (opts: any) => {
Streaming response from ${opts.options.hostname}:${opts.options.port}.\n\
Request body:\n${JSON.stringify(opts.body, null, 2)}\n\n
Request options:\n${JSON.stringify(opts.options, null, 2)}\n\n
Number characters in all messages = ${opts.body.messages?.reduce(
(acc: number, msg: Message) => {
Number characters in all messages = ${
opts.body.messages &&
opts.body.messages?.reduce((acc: number, msg: Message) => {
return msg.content?.length ? acc + msg.content?.length : 0
},
0
)}\n\n
`
}, 0)
}\n\n
`.trim()
)
}
Loading

0 comments on commit b41bde7

Please sign in to comment.