Skip to content

Commit

Permalink
Fix tests and Readme for Document Intelligence (Azure#41524)
Browse files Browse the repository at this point in the history
  • Loading branch information
samvaity authored Aug 15, 2024
1 parent 9125081 commit 6313070
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 17 deletions.
15 changes: 8 additions & 7 deletions sdk/documentintelligence/azure-ai-documentintelligence/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ It includes the following main features:
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-ai-documentintelligence</artifactId>
<version>1.0.0-beta.3</version>
<version>1.0.0-beta.4</version>
</dependency>
```
[//]: # ({x-version-update-end})

> Note: This version of the client library defaults to the `"2024-02-29-preview"` version of the service.
> Note: This version of the client library defaults to the `"2024-07-31-preview"` version of the service.
This table shows the relationship between SDK versions and supported API versions of the service:

Expand All @@ -42,15 +42,16 @@ This table shows the relationship between SDK versions and supported API version
| 1.0.0-beta.1 | 2023-10-31-preview |
| 1.0.0-beta.2 | 2024-02-29-preview |
| 1.0.0-beta.3 | 2024-02-29-preview |
| 1.0.0-beta.4 | 2024-07-31-preview |

> Note: Please rely on the older `azure-ai-formrecognizer` library through the older service API versions for retired
> models, such as `"prebuilt-businessCard"` and `"prebuilt-document"`. For more information, see [Changelog][changelog].
> The below table describes the relationship of each client and its supported API version(s):
| API version | Supported clients |
|----------------------------------------|-----------------------------------------------------------------------------------------------|
| 2023-10-31-preview, 2024-02-29-preview | DocumentIntelligenceClient and DocumentIntelligenceAsyncClient |
| 2023-07-31 | DocumentAnalysisClient and DocumentModelAdministrationClient in `azure-ai-formrecognizer` SDK |
| API version | Supported clients |
|------------------------------------------------------------|-----------------------------------------------------------------------------------------------|
| 2023-10-31-preview, 2024-02-29-preview, 2024-07-31-preview | DocumentIntelligenceClient and DocumentIntelligenceAsyncClient |
| 2023-07-31 | DocumentAnalysisClient and DocumentModelAdministrationClient in `azure-ai-formrecognizer` SDK |

Please see the [Migration Guide][migration_guide] for more information about migrating from `azure-ai-formrecognizer` to `azure-ai-documentintelligence`.

Expand Down Expand Up @@ -92,7 +93,7 @@ Authentication with AAD requires some initial setup:
<dependency>
<groupId>com.azure</groupId>
<artifactId>azure-identity</artifactId>
<version>1.13.1</version>
<version>1.13.2</version>
</dependency>
```
[//]: # ({x-version-update-end})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "java",
"TagPrefix": "java/documentintelligence/azure-ai-documentintelligence",
"Tag": "java/documentintelligence/azure-ai-documentintelligence_0e670a71d8"
"Tag": "java/documentintelligence/azure-ai-documentintelligence_32bf8db3b0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@

package com.azure.ai.documentintelligence;

import com.azure.ai.documentintelligence.models.AnalyzeBatchDocumentsRequest;
import com.azure.ai.documentintelligence.models.AnalyzeBatchResult;
import com.azure.ai.documentintelligence.models.AnalyzeBatchResultOperation;
import com.azure.ai.documentintelligence.models.AnalyzeDocumentRequest;
import com.azure.ai.documentintelligence.models.AnalyzeOutputOption;
import com.azure.ai.documentintelligence.models.AnalyzeResult;
import com.azure.ai.documentintelligence.models.AnalyzeResultOperation;
import com.azure.ai.documentintelligence.models.AzureBlobContentSource;
import com.azure.ai.documentintelligence.models.BuildDocumentClassifierRequest;
import com.azure.ai.documentintelligence.models.BuildDocumentModelRequest;
import com.azure.ai.documentintelligence.models.ClassifierDocumentTypeDetails;
import com.azure.ai.documentintelligence.models.ClassifyDocumentRequest;
import com.azure.ai.documentintelligence.models.DocumentBuildMode;
import com.azure.ai.documentintelligence.models.DocumentClassifierBuildOperationDetails;
import com.azure.ai.documentintelligence.models.DocumentClassifierDetails;
import com.azure.ai.documentintelligence.models.DocumentModelBuildOperationDetails;
import com.azure.ai.documentintelligence.models.DocumentModelDetails;
import com.azure.core.http.HttpClient;
import com.azure.core.test.annotation.RecordWithoutRequestBody;
import com.azure.core.test.http.AssertingHttpClientBuilder;
Expand All @@ -30,6 +37,7 @@
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;

import static com.azure.ai.documentintelligence.TestUtils.BATCH_SAMPLE_PDF;
import static com.azure.ai.documentintelligence.TestUtils.CONTENT_FORM_JPG;
import static com.azure.ai.documentintelligence.TestUtils.CONTENT_GERMAN_PDF;
import static com.azure.ai.documentintelligence.TestUtils.DEFAULT_TIMEOUT;
Expand All @@ -42,6 +50,7 @@
import static com.azure.ai.documentintelligence.TestUtils.RECEIPT_CONTOSO_JPG;
import static com.azure.ai.documentintelligence.TestUtils.W2_JPG;
import static com.azure.ai.documentintelligence.TestUtils.urlRunner;
import static com.azure.ai.documentintelligence.models.AnalyzeOutputOption.PDF;

public class DocumentIntelligenceAsyncClientTest extends DocumentIntelligenceClientTestBase {

Expand Down Expand Up @@ -366,11 +375,10 @@ public void getAnalyzePdf(HttpClient httpClient,
@RecordWithoutRequestBody
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.documentintelligence.TestUtils#getTestParameters")
@Disabled("The parameter Output is invalid: parameter not supported.")
public void getAnalyzeFigures(HttpClient httpClient,
DocumentIntelligenceServiceVersion serviceVersion) {
client = getDocumentAnalysisAsyncClient(httpClient, serviceVersion);
String modelID = "prebuilt-read";
String modelID = "prebuilt-layout";
dataRunner((data, dataLength) -> {
PollerFlux<AnalyzeResultOperation, AnalyzeResult>
resultPollerFlux
Expand All @@ -388,12 +396,37 @@ public void getAnalyzeFigures(HttpClient httpClient,
byte[] figuresHeader = { figuresBytes[0], figuresBytes[1], figuresBytes[2], figuresBytes[3], figuresBytes[4] };

// A PNG's header is expected to start with: ‰PNG
Assertions.assertArrayEquals(new byte[] { (byte) 0x89, 0x50, 0x4E, 0x47 }, figuresHeader);
Assertions.assertArrayEquals(new byte[] { (byte) -119, 80, 78, 71, 13 }, figuresHeader);
})
.expectComplete()
.verify(DEFAULT_TIMEOUT);
}, LAYOUT_SAMPLE);
}

@RecordWithoutRequestBody
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.documentintelligence.TestUtils#getTestParameters")
@Disabled("Disabled until file available on main")
public void analyzeBatchDocuments(HttpClient httpClient,
DocumentIntelligenceServiceVersion serviceVersion) {
client = getDocumentAnalysisAsyncClient(httpClient, serviceVersion);
DocumentIntelligenceAdministrationAsyncClient adminClient = getDocumentAdminAsyncClient(httpClient, serviceVersion);
buildBatchModelRunner((trainingFilesUrl) -> {
SyncPoller<DocumentModelBuildOperationDetails, DocumentModelDetails> buildModelPoller =
adminClient
.beginBuildDocumentModel(new BuildDocumentModelRequest("modelID" + UUID.randomUUID(), DocumentBuildMode.GENERATIVE).setAzureBlobSource(new AzureBlobContentSource(trainingFilesUrl)))
.setPollInterval(durationTestMode)
.getSyncPoller();

String modelId = buildModelPoller.getFinalResult().getModelId();

urlRunner((sourceUrl) -> {
SyncPoller<AnalyzeBatchResultOperation, AnalyzeBatchResult>
syncPoller
= client.beginAnalyzeBatchDocuments(modelId, null, null, null, null, null, null, Collections.singletonList(PDF), new AnalyzeBatchDocumentsRequest(trainingFilesUrl).setResultPrefix("trainingDocsResult/").setAzureBlobSource(new AzureBlobContentSource(sourceUrl)))
.setPollInterval(durationTestMode)
.getSyncPoller();
}, BATCH_SAMPLE_PDF);
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

package com.azure.ai.documentintelligence;

import com.azure.ai.documentintelligence.models.AnalyzeBatchDocumentsRequest;
import com.azure.ai.documentintelligence.models.AnalyzeBatchResult;
import com.azure.ai.documentintelligence.models.AnalyzeBatchResultOperation;
import com.azure.ai.documentintelligence.models.AnalyzeDocumentRequest;
import com.azure.ai.documentintelligence.models.AnalyzeOutputOption;
import com.azure.ai.documentintelligence.models.AnalyzeResult;
Expand Down Expand Up @@ -33,6 +36,7 @@
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;

import static com.azure.ai.documentintelligence.TestUtils.BATCH_SAMPLE_PDF;
import static com.azure.ai.documentintelligence.TestUtils.CONTENT_FORM_JPG;
import static com.azure.ai.documentintelligence.TestUtils.CONTENT_GERMAN_PDF;
import static com.azure.ai.documentintelligence.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
Expand All @@ -43,6 +47,7 @@
import static com.azure.ai.documentintelligence.TestUtils.MULTIPAGE_INVOICE_PDF;
import static com.azure.ai.documentintelligence.TestUtils.RECEIPT_CONTOSO_JPG;
import static com.azure.ai.documentintelligence.TestUtils.urlRunner;
import static com.azure.ai.documentintelligence.models.AnalyzeOutputOption.PDF;

public class DocumentIntelligenceClientTest extends DocumentIntelligenceClientTestBase {
private DocumentIntelligenceClient client;
Expand Down Expand Up @@ -216,7 +221,6 @@ null, null, null, null, null, null, null, new AnalyzeDocumentRequest().setBase64
*/
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.documentintelligence.TestUtils#getTestParameters")
@Disabled("https://github.com/Azure/azure-sdk-for-java/issues/41027")
public void analyzeCustomDocumentUrl(HttpClient httpClient,
DocumentIntelligenceServiceVersion serviceVersion) {
client = getDocumentAnalysisClient(httpClient, serviceVersion);
Expand Down Expand Up @@ -444,7 +448,7 @@ public void getAnalyzePdf(HttpClient httpClient,
dataRunner((data, dataLength) -> {
SyncPoller<AnalyzeResultOperation, AnalyzeResult>
syncPoller
= client.beginAnalyzeDocument(modelID, null, null, null, null, null, null, Collections.singletonList(AnalyzeOutputOption.PDF), new AnalyzeDocumentRequest().setBase64Source(data))
= client.beginAnalyzeDocument(modelID, null, null, null, null, null, null, Collections.singletonList(PDF), new AnalyzeDocumentRequest().setBase64Source(data))
.setPollInterval(durationTestMode);
String resultId = syncPoller.poll().getValue().getOperationId();
syncPoller.waitForCompletion();
Expand All @@ -461,11 +465,10 @@ public void getAnalyzePdf(HttpClient httpClient,
@RecordWithoutRequestBody
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.documentintelligence.TestUtils#getTestParameters")
@Disabled("The parameter Output is invalid: parameter not supported.")
public void getAnalyzeFigures(HttpClient httpClient,
DocumentIntelligenceServiceVersion serviceVersion) {
client = getDocumentAnalysisClient(httpClient, serviceVersion);
String modelID = "prebuilt-read";
String modelID = "prebuilt-layout";
dataRunner((data, dataLength) -> {
SyncPoller<AnalyzeResultOperation, AnalyzeResult>
syncPoller
Expand All @@ -482,8 +485,35 @@ public void getAnalyzeFigures(HttpClient httpClient,
byte[] figuresHeader = { figuresBytes[0], figuresBytes[1], figuresBytes[2], figuresBytes[3], figuresBytes[4] };

// A PNG's header is expected to start with: ‰PNG
Assertions.assertArrayEquals(new byte[] { (byte) 0x89, 0x50, 0x4E, 0x47 }, figuresHeader);
Assertions.assertArrayEquals(new byte[] { (byte) -119, 80, 78, 71, 13 }, figuresHeader);
}, LAYOUT_SAMPLE);

}

@RecordWithoutRequestBody
@ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@MethodSource("com.azure.ai.documentintelligence.TestUtils#getTestParameters")
@Disabled("Disabled until file available on main")
public void analyzeBatchDocuments(HttpClient httpClient,
DocumentIntelligenceServiceVersion serviceVersion) {
client = getDocumentAnalysisClient(httpClient, serviceVersion);
DocumentIntelligenceAdministrationClient adminClient = getDocumentModelAdminClient(httpClient, serviceVersion);
buildBatchModelRunner((trainingFilesUrl) -> {
SyncPoller<DocumentModelBuildOperationDetails, DocumentModelDetails> buildModelPoller =
adminClient
.beginBuildDocumentModel(new BuildDocumentModelRequest("modelID" + UUID.randomUUID(), DocumentBuildMode.GENERATIVE).setAzureBlobSource(new AzureBlobContentSource(trainingFilesUrl)))
.setPollInterval(durationTestMode);
buildModelPoller.waitForCompletion();

String modelId = buildModelPoller.getFinalResult().getModelId();

urlRunner((sourceUrl) -> {
SyncPoller<AnalyzeBatchResultOperation, AnalyzeBatchResult>
syncPoller
= client.beginAnalyzeBatchDocuments(modelId, null, null, null, null, null, null, Collections.singletonList(PDF), new AnalyzeBatchDocumentsRequest(trainingFilesUrl).setResultPrefix("trainingDocsResult/").setAzureBlobSource(new AzureBlobContentSource(sourceUrl)))
.setPollInterval(durationTestMode);
syncPoller.waitForCompletion();
}, BATCH_SAMPLE_PDF);
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ void buildModelRunner(Consumer<String> testRunner) {
TestUtils.getTrainingDataContainerHelper(testRunner, interceptorManager.isPlaybackMode());
}

void buildBatchModelRunner(Consumer<String> testRunner) {
TestUtils.getBatchTrainingDataContainerHelper(testRunner, interceptorManager.isPlaybackMode());
}

void beginClassifierRunner(Consumer<String> testRunner) {
TestUtils.getClassifierTrainingDataContainerHelper(testRunner, interceptorManager.isPlaybackMode());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public final class TestUtils {
static final String W2_JPG = "w2-single.png";
static final String IRS_1040 = "IRS-1040_3.pdf";
static final String LAYOUT_SAMPLE = "layout-pageobject.pdf";
static final String BATCH_SAMPLE_PDF = "Acord_27.pdf";

static final String EXPECTED_MERCHANT_NAME = "Contoso";
static final String URL_TEST_FILE_FORMAT = "https://raw.githubusercontent.com/Azure/azure-sdk-for-java/"
Expand All @@ -72,6 +73,8 @@ public final class TestUtils {
GLOBAL_CONFIGURATION.get("DOCUMENTINTELLIGENCE_SELECTION_MARK_DATA_CONTAINER_SAS_URL");
public static final String DOCUMENTINTELLIGENCE_CLASSIFIER_TRAINING_DATA_CONTAINER_SAS_URL_CONFIGURATION =
GLOBAL_CONFIGURATION.get("DOCUMENTINTELLIGENCE_CLASSIFIER_TRAINING_DATA_CONTAINER_SAS_URL");
public static final String DOCUMENTINTELLIGENCE_BATCH_TRAINING_DATA_CONTAINER_SAS_URL_CONFIGURATION =
GLOBAL_CONFIGURATION.get("DOCUMENTINTELLIGENCE_BATCH_TRAINING_DATA_CONTAINER_SAS_URL");
public static final Duration DEFAULT_POLL_INTERVAL = Duration.ofSeconds(5);
public static final Duration DEFAULT_TIMEOUT = Duration.ofSeconds(30);
private TestUtils() {
Expand Down Expand Up @@ -104,7 +107,9 @@ public static void getSelectionMarkTrainingContainerHelper(Consumer<String> test
public static void getClassifierTrainingDataContainerHelper(Consumer<String> testRunner, boolean isPlaybackMode) {
testRunner.accept(getClassifierTrainingFilesContainerUrl(isPlaybackMode));
}

public static void getBatchTrainingDataContainerHelper(Consumer<String> testRunner, boolean isPlaybackMode) {
testRunner.accept(getBatchTrainingFilesContainerUrl(isPlaybackMode));
}
/**
* Get the testing data set SAS Url value based on the test running mode.
*
Expand Down Expand Up @@ -153,6 +158,15 @@ private static String getClassifierTrainingFilesContainerUrl(boolean isPlaybackM
return isPlaybackMode ? "https://isPlaybackmode" : DOCUMENTINTELLIGENCE_CLASSIFIER_TRAINING_DATA_CONTAINER_SAS_URL_CONFIGURATION;
}

/**
* Get the training data set SAS Url value based on the test running mode.
*
* @return the training data set Url
*/
private static String getBatchTrainingFilesContainerUrl(boolean isPlaybackMode) {
return isPlaybackMode ? "https://isPlaybackmode" : DOCUMENTINTELLIGENCE_BATCH_TRAINING_DATA_CONTAINER_SAS_URL_CONFIGURATION;
}

/**
* Returns a stream of arguments that includes all combinations of eligible {@link HttpClient HttpClients} and
* service versions that should be tested.
Expand Down
Binary file not shown.
17 changes: 17 additions & 0 deletions sdk/documentintelligence/test-resources.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
"type": "string",
"defaultValue": "trainingdata-v3"
},
"batchTrainingDataContainer": {
"type": "string",
"defaultValue": "trainingdata-batch"
},
"selectionMarkTrainingDataContainer": {
"type": "string",
"defaultValue": "selectionmark-v3"
Expand All @@ -47,6 +51,15 @@
"signedResource": "c"
}
},
"batchTrainingSasProperties": {
"type": "object",
"defaultValue": {
"canonicalizedResource": "[concat('/blob/', parameters('blobStorageAccount'), '/', parameters('batchTrainingDataContainer'))]",
"signedExpiry": "[dateTimeAdd(utcNow('u'), 'P2M')]",
"signedPermission": "rwl",
"signedResource": "c"
}
},
"selectionMarkTrainingDataSasProperties": {
"type": "object",
"defaultValue": {
Expand Down Expand Up @@ -122,6 +135,10 @@
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('classifierTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('classifierTrainingSasProperties')).serviceSasToken)]"
},
"DOCUMENTINTELLIGENCE_BATCH_TRAINING_DATA_CONTAINER_SAS_URL": {
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('batchTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('batchTrainingSasProperties')).serviceSasToken)]"
},
"DOCUMENTINTELLIGENCE_MULTIPAGE_TRAINING_DATA_CONTAINER_SAS_URL": {
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('multiPageTestingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('multiPageTestingDataSasProperties')).serviceSasToken)]"
Expand Down

0 comments on commit 6313070

Please sign in to comment.