@@ -254,7 +254,7 @@ async function selectChunks(
254
254
const limiter = createLimiter ( maxConcurrency ) ;
255
255
const batches = makeBatches ( allChunks , 250000 ) ; // TODO: Tune this more
256
256
console_log (
257
- ` [maxConcurrency = ${ maxConcurrency } , ${ batches . length } batches]` ,
257
+ ` [${ batches . length } batches, maxConcurrency ${ maxConcurrency } ]` ,
258
258
) ;
259
259
for ( const batch of batches ) {
260
260
const p = limiter ( ( ) =>
@@ -280,13 +280,13 @@ async function selectChunks(
280
280
// console_log(` [${allChunks.map((c) => (c.relevance)).join(", ")}]`);
281
281
const chunks = keepBestChunks ( allChunkDescs , allChunks , 250000 ) ; // TODO: Tune this more
282
282
console_log ( ` [Keeping ${ chunks . length } chunks]` ) ;
283
- for ( let i = 0 ; i < chunks . length ; i ++ ) {
284
- const chunk = chunks [ i ] ;
285
- const chunkDesc = allChunkDescs [ i ] ;
286
- console_log (
287
- ` [${ chunkDesc . relevance } ${ path . basename ( chunk . fileName ) } :${ chunk . codeName } ${ chunk . chunkId } ]` ,
288
- ) ;
289
- }
283
+ // for (let i = 0; i < chunks.length; i++) {
284
+ // const chunk = chunks[i];
285
+ // const chunkDesc = allChunkDescs[i];
286
+ // console_log(
287
+ // ` [${chunkDesc.relevance} ${path.basename(chunk.fileName)}:${chunk.codeName} ${chunk.chunkId}]`,
288
+ // );
289
+ // }
290
290
return chunks ;
291
291
}
292
292
@@ -611,14 +611,10 @@ async function loadDatabase(
611
611
` [Chunked ${ allChunkedFiles . length } files into ${ allChunks . length } chunks]` ,
612
612
) ;
613
613
614
- // Let's see how things go without summaries.
615
- // They are slow and don't fit in the oracle's buffer.
616
- // TODO: Restore this feature.
617
-
618
- // // 1c. Use a fast model to summarize all chunks.
619
- // if (allChunks.length) {
620
- // await summarizeChunks(context, allChunks);
621
- // }
614
+ // 1c. Use a fast model to summarize all chunks.
615
+ if ( allChunks . length ) {
616
+ await summarizeChunks ( context , allChunks ) ;
617
+ }
622
618
623
619
return db ;
624
620
}
@@ -697,12 +693,15 @@ export async function summarizeChunks(
697
693
console_log (
698
694
`[Step 1c: Summarizing ${ chunks . length } chunks (may take a while)]` ,
699
695
) ;
696
+ // NOTE: We cannot stuff the buffer, because the completion size
697
+ // is limited to 4096 tokens, and we expect a certain number of
698
+ // tokens per chunk. Experimentally, 40 chunks per job works great.
700
699
const maxConcurrency =
701
- parseInt ( process . env . AZURE_OPENAI_MAX_CONCURRENCY ?? "0" ) ?? 40 ;
702
- let chunksPerJob = 30 ;
700
+ parseInt ( process . env . AZURE_OPENAI_MAX_CONCURRENCY ?? "0" ) ?? 5 ;
701
+ let chunksPerJob = 40 ;
703
702
let numJobs = Math . ceil ( chunks . length / chunksPerJob ) ;
704
703
console_log (
705
- ` [maxConcurrency = ${ maxConcurrency } , chunksPerJob = ${ chunksPerJob } , numJobs = ${ numJobs } ]` ,
704
+ ` [${ chunksPerJob } chunks/job, ${ numJobs } jobs, maxConcurrency ${ maxConcurrency } ]` ,
706
705
) ;
707
706
const limiter = createLimiter ( maxConcurrency ) ;
708
707
const promises : Promise < void > [ ] = [ ] ;
@@ -732,10 +731,9 @@ async function summarizeChunkSlice(
732
731
summarizer . translate ( prompt ) ,
733
732
) ;
734
733
if ( ! result ) {
735
- const chunkSummary = chunks
736
- . map ( ( c ) => `${ path . basename ( c . fileName ) } :${ c . codeName } ` )
737
- . join ( ", " ) ;
738
- console_log ( ` [Failed to summarize chunks for ${ chunkSummary } ]` ) ;
734
+ console_log (
735
+ ` [Failed to summarize chunks for ${ chunks . length } chunks]` ,
736
+ ) ;
739
737
return ;
740
738
}
741
739
0 commit comments