Skip to content

Commit 36f4e7c

Browse files
TaoChenOSUamsacha
andauthored
Copilot Chat: support multiple document import (#1675)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Copilot Chat currently only supports importing a single document at a time. Supporting multiple documents will improve user experience. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> 1. Add multi-document support in DocumentImportController. Did a little refactoring too. 2. Create a configurable limit on the number of documents that can be imported at a time. It's currently set to 10. 3. Enable support in the webapp, both drag&drop and file explorer. Update the document history item to show multiple files. 4. Update the import document console app to support multi-doc import. ![image](https://github.com/microsoft/semantic-kernel/assets/12570346/64e025fb-de71-4bef-9903-08ad570c5e1e) Future work: https://github.com/orgs/microsoft/projects/852/views/1?pane=issue&itemId=31798351 ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [ ] The code builds clean without any errors or warnings - [ ] The PR follows SK Contribution Guidelines (https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) - [ ] The code follows the .NET coding conventions (https://learn.microsoft.com/dotnet/csharp/fundamentals/coding-style/coding-conventions) verified with `dotnet format` - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄 --------- Co-authored-by: Aman Sachan <51973971+amsacha@users.noreply.github.com>
1 parent 5e70eef commit 36f4e7c

File tree

14 files changed

+448
-155
lines changed

14 files changed

+448
-155
lines changed

samples/apps/copilot-chat-app/importdocument/Program.cs

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
namespace ImportDocument;
1414

1515
/// <summary>
16-
/// This console app imports a file to the CopilotChat WebAPI document memory store.
16+
/// This console app imports a list of files to the CopilotChat WebAPI document memory store.
1717
/// </summary>
1818
public static class Program
1919
{
@@ -26,30 +26,30 @@ public static void Main(string[] args)
2626
return;
2727
}
2828

29-
var fileOption = new Option<FileInfo>(name: "--file", description: "The file to import to document memory store.")
29+
var filesOption = new Option<IEnumerable<FileInfo>>(name: "--files", description: "The files to import to document memory store.")
3030
{
31-
IsRequired = true
31+
IsRequired = true,
32+
AllowMultipleArgumentsPerToken = true,
3233
};
3334

34-
// TODO: UI to retrieve ChatID from the WebApp will be added in the future with multi-user support.
3535
var chatCollectionOption = new Option<Guid>(
3636
name: "--chat-id",
3737
description: "Save the extracted context to an isolated chat collection.",
3838
getDefaultValue: () => Guid.Empty
3939
);
4040

4141
var rootCommand = new RootCommand(
42-
"This console app imports a file to the CopilotChat WebAPI's document memory store."
42+
"This console app imports files to the CopilotChat WebAPI's document memory store."
4343
)
4444
{
45-
fileOption, chatCollectionOption
45+
filesOption, chatCollectionOption
4646
};
4747

48-
rootCommand.SetHandler(async (file, chatCollectionId) =>
48+
rootCommand.SetHandler(async (files, chatCollectionId) =>
4949
{
50-
await UploadFileAsync(file, config!, chatCollectionId);
50+
await ImportFilesAsync(files, config!, chatCollectionId);
5151
},
52-
fileOption, chatCollectionOption
52+
filesOption, chatCollectionOption
5353
);
5454

5555
rootCommand.Invoke(args);
@@ -97,17 +97,20 @@ private static async Task<bool> AcquireUserAccountAsync(
9797
}
9898

9999
/// <summary>
100-
/// Conditionally uploads a file to the Document Store for parsing.
100+
/// Conditionally imports a list of files to the Document Store.
101101
/// </summary>
102-
/// <param name="file">The file to upload for injection.</param>
102+
/// <param name="files">A list of files to import.</param>
103103
/// <param name="config">Configuration.</param>
104104
/// <param name="chatCollectionId">Save the extracted context to an isolated chat collection.</param>
105-
private static async Task UploadFileAsync(FileInfo file, Config config, Guid chatCollectionId)
105+
private static async Task ImportFilesAsync(IEnumerable<FileInfo> files, Config config, Guid chatCollectionId)
106106
{
107-
if (!file.Exists)
107+
foreach (var file in files)
108108
{
109-
Console.WriteLine($"File {file.FullName} does not exist.");
110-
return;
109+
if (!file.Exists)
110+
{
111+
Console.WriteLine($"File {file.FullName} does not exist.");
112+
return;
113+
}
111114
}
112115

113116
IAccount? userAccount = null;
@@ -120,11 +123,12 @@ private static async Task UploadFileAsync(FileInfo file, Config config, Guid cha
120123
}
121124
Console.WriteLine($"Successfully acquired User ID. Continuing...");
122125

123-
using var fileContent = new StreamContent(file.OpenRead());
124-
using var formContent = new MultipartFormDataContent
126+
using var formContent = new MultipartFormDataContent();
127+
List<StreamContent> filesContent = files.Select(file => new StreamContent(file.OpenRead())).ToList();
128+
for (int i = 0; i < filesContent.Count; i++)
125129
{
126-
{ fileContent, "formFile", file.Name }
127-
};
130+
formContent.Add(filesContent[i], "formFiles", files.ElementAt(i).Name);
131+
}
128132

129133
var userId = userAccount!.HomeAccountId.Identifier;
130134
var userName = userAccount.Username;
@@ -153,6 +157,12 @@ private static async Task UploadFileAsync(FileInfo file, Config config, Guid cha
153157
// Calling UploadAsync here to make sure disposable objects are still in scope.
154158
await UploadAsync(formContent, accessToken!, config);
155159
}
160+
161+
// Dispose of all the file streams.
162+
foreach (var fileContent in filesContent)
163+
{
164+
fileContent.Dispose();
165+
}
156166
}
157167

158168
/// <summary>
@@ -185,7 +195,7 @@ private static async Task UploadAsync(
185195
try
186196
{
187197
using HttpResponseMessage response = await httpClient.PostAsync(
188-
new Uri(new Uri(config.ServiceUri), "importDocument"),
198+
new Uri(new Uri(config.ServiceUri), "importDocuments"),
189199
multipartFormDataContent
190200
);
191201

samples/apps/copilot-chat-app/importdocument/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,23 @@ Importing documents enables Copilot Chat to have up-to-date knowledge of specifi
3232
4. **Run** the following command to import a document to the app under the global document collection where
3333
all users will have access to:
3434

35-
`dotnet run -- --file .\sample-docs\ms10k.txt`
35+
`dotnet run --files .\sample-docs\ms10k.txt`
3636

3737
Or **Run** the following command to import a document to the app under a chat isolated document collection where
3838
only the chat session will have access to:
3939

40-
`dotnet run -- --file .\sample-docs\ms10k.txt --chat-id [chatId]`
40+
`dotnet run --files .\sample-docs\ms10k.txt --chat-id [chatId]`
4141

4242
> Note that this will open a browser window for you to sign in to retrieve your user id to make sure you have access to the chat session.
4343
4444
> Currently only supports txt and pdf files. A sample file is provided under ./sample-docs.
4545
4646
Importing may take some time to generate embeddings for each piece/chunk of a document.
47+
48+
To import multiple files, specify multiple files. For example:
49+
50+
`dotnet run --files .\sample-docs\ms10k.txt .\sample-docs\Microsoft-Responsible-AI-Standard-v2-General-Requirements.pdf`
51+
4752
5. Chat with the bot.
4853

4954
Examples:

0 commit comments

Comments
 (0)