From aa494623a279d33325b51a01446e682a836a6821 Mon Sep 17 00:00:00 2001 From: qued <64741807+qued@users.noreply.github.com> Date: Tue, 14 Mar 2023 09:40:30 -0500 Subject: [PATCH] chore: bump versions (#352) Update versions of dependencies, including unpinning the unstructured-inference dependency that's causing conflicts in repos like pipeline-oer that want the newer version. --- requirements/base.txt | 2 +- requirements/dev.txt | 2 +- requirements/huggingface.txt | 2 +- requirements/ingest-github.txt | 2 +- requirements/ingest-gitlab.txt | 2 +- requirements/ingest-google-drive.txt | 2 +- requirements/ingest-reddit.txt | 2 +- requirements/ingest-s3.txt | 2 +- requirements/ingest-wikipedia.txt | 2 +- requirements/local-inference.txt | 14 +- requirements/test.txt | 2 +- setup.py | 2 +- .../65/11/main.PMC6312790.pdf.json | 28 ++-- .../75/29/main.PMC6312793.pdf.json | 20 +-- .../2023-Jan-economic-outlook.pdf.json | 132 ++++++------------ .../small-pdf-set/Silent-Giant-(1).pdf.json | 48 +++---- .../recalibrating-risk-report.pdf.json | 36 ++--- .../test-ingest-github.sh | 2 +- test_unstructured_ingest/test-ingest-s3.sh | 2 +- 19 files changed, 126 insertions(+), 178 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 34a47fa04c..36172b0809 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -111,7 +111,7 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via python-pptx zipp==3.15.0 # via importlib-metadata diff --git a/requirements/dev.txt b/requirements/dev.txt index 586ff10594..548be76bce 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -199,7 +199,7 @@ pip-tools==6.12.3 # via -r requirements/dev.in pkgutil-resolve-name==1.3.10 # via jsonschema -platformdirs==3.1.0 +platformdirs==3.1.1 # via # jupyter-core # virtualenv diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index 6a94beada6..cde736dbe6 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -155,7 +155,7 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via python-pptx zipp==3.15.0 # via importlib-metadata diff --git a/requirements/ingest-github.txt b/requirements/ingest-github.txt index 428932508f..a5761e3fb7 100644 --- a/requirements/ingest-github.txt +++ b/requirements/ingest-github.txt @@ -195,7 +195,7 @@ wrapt==1.14.1 # -r requirements/base.txt # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/ingest-gitlab.txt b/requirements/ingest-gitlab.txt index 562dc6ed87..89bb088679 100644 --- a/requirements/ingest-gitlab.txt +++ b/requirements/ingest-gitlab.txt @@ -189,7 +189,7 @@ wrapt==1.14.1 # -r requirements/base.txt # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/ingest-google-drive.txt b/requirements/ingest-google-drive.txt index f41070fe4d..1099c632aa 100644 --- a/requirements/ingest-google-drive.txt +++ b/requirements/ingest-google-drive.txt @@ -221,7 +221,7 @@ wrapt==1.14.1 # -r requirements/base.txt # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/ingest-reddit.txt b/requirements/ingest-reddit.txt index b9498ce74b..0e6ef3abbe 100644 --- a/requirements/ingest-reddit.txt +++ b/requirements/ingest-reddit.txt @@ -193,7 +193,7 @@ wrapt==1.14.1 # -r requirements/base.txt # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/ingest-s3.txt b/requirements/ingest-s3.txt index 3339af1008..10b755913b 100644 --- a/requirements/ingest-s3.txt +++ b/requirements/ingest-s3.txt @@ -221,7 +221,7 @@ wrapt==1.14.1 # aiobotocore # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/ingest-wikipedia.txt b/requirements/ingest-wikipedia.txt index 81fad29d8d..f3b1c0f061 100644 --- a/requirements/ingest-wikipedia.txt +++ b/requirements/ingest-wikipedia.txt @@ -190,7 +190,7 @@ wrapt==1.14.1 # -r requirements/base.txt # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via # -r requirements/base.txt # python-pptx diff --git a/requirements/local-inference.txt b/requirements/local-inference.txt index 4bb5f140e8..9def8ca53b 100644 --- a/requirements/local-inference.txt +++ b/requirements/local-inference.txt @@ -46,7 +46,7 @@ effdet==0.3.0 # via layoutparser et-xmlfile==1.1.0 # via openpyxl -fastapi==0.93.0 +fastapi==0.94.0 # via unstructured-inference filelock==3.9.0 # via @@ -84,8 +84,6 @@ iopath==0.1.10 # via layoutparser joblib==1.2.0 # via nltk -jsons==1.6.3 - # via unstructured-inference kiwisolver==1.4.4 # via matplotlib layoutparser[layoutmodels,tesseract]==0.3.4 @@ -219,7 +217,7 @@ sniffio==1.3.0 # anyio # httpcore # httpx -starlette==0.25.0 +starlette==0.26.0.post1 # via fastapi sympy==1.11.1 # via onnxruntime @@ -256,13 +254,11 @@ typing-extensions==4.5.0 # starlette # torch # torchvision -typish==1.9.3 - # via jsons -unstructured-inference==0.2.7 +unstructured-inference==0.2.11 # via unstructured (setup.py) urllib3==1.26.14 # via requests -uvicorn==0.20.0 +uvicorn==0.21.0 # via unstructured-inference wand==0.6.11 # via pdfplumber @@ -270,7 +266,7 @@ wrapt==1.14.1 # via # argilla # deprecated -xlsxwriter==3.0.8 +xlsxwriter==3.0.9 # via python-pptx zipp==3.15.0 # via diff --git a/requirements/test.txt b/requirements/test.txt index de3fe722d5..ce11af7b9a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -52,7 +52,7 @@ packaging==23.0 # pytest pathspec==0.11.0 # via black -platformdirs==3.1.0 +platformdirs==3.1.1 # via black pluggy==1.0.0 # via pytest diff --git a/setup.py b/setup.py index b2519862e4..c0b34bc563 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,7 @@ ], "local-inference": [ # NOTE(robinson) - Upper bound is temporary due to a multithreading issue - "unstructured-inference>=0.2.4,<0.2.8", + "unstructured-inference~=0.2.4", ], "s3": ["s3fs", "fsspec"], "azure": ["adlfs", "fsspec"], diff --git a/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/65/11/main.PMC6312790.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/65/11/main.PMC6312790.pdf.json index 0215335b2a..3dff6bfb8c 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/65/11/main.PMC6312790.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/65/11/main.PMC6312790.pdf.json @@ -216,16 +216,16 @@ } }, { - "element_id": "a6d9e195a423f52793f2671020bc0a90", - "text": "corrosion in sulphuric acid environment. \f The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type", + "element_id": "a46f9f30a7e99da10ee392358628f902", + "text": "corrosion in sulphuric acid environment. The data obtained for the inhibition of waste product (egg shell powder) on stainless steel Type", "type": "ListItem", "metadata": { "page_number": 2 } }, { - "element_id": "9c27a353d941bcc3cbf8596f744b1941", - "text": "can be used as basis in determining the inhibitive performance of the same inhibitor in other environments. \f The data can be used to examine the relationship between the process variable as it affect the nature of inhibition of metals.", + "element_id": "ca7b772bc6e34f631824b746c4525650", + "text": "can be used as basis in determining the inhibitive performance of the same inhibitor in other environments. The data can be used to examine the relationship between the process variable as it affect the nature of inhibition of metals.", "type": "ListItem", "metadata": { "page_number": 2 @@ -320,8 +320,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "a080eedc29ff995e00faf2b0341bfa18", + "text": "ottabitse.eeeeso—«—«", "type": "FigureCaption", "metadata": { "page_number": 4 @@ -352,8 +352,8 @@ } }, { - "element_id": "3382d82decb1e5e8754d8083f5b3a916", - "text": "Inhibitor concentration (g) bc (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm 2 ) Polarization resistance ( Ω ) Corrosion rate (mm/year) 0 0.0335 0.0409 \f 0.9393 0.0003 24.0910 2.8163 2 1.9460 0.0596 \f 0.8276 0.0002 121.440 1.5054 4 0.0163 0.2369 \f 0.8825 0.0001 42.121 0.9476 6 0.3233 0.0540 \f 0.8027 5.39E-05 373.180 0.4318 8 0.1240 0.0556 \f 0.5896 5.46E-05 305.650 0.3772 10 0.0382 0.0086 \f 0.5356 1.24E-05 246.080 0.0919", + "element_id": "8b206a2157155bfd11874c55924f92ee", + "text": "Inhibitor concentration (g) bc (V/dec) ba (V/dec) Ecorr (V) icorr (A/cm 2 ) Polarization resistance ( Ω ) Corrosion rate (mm/year) 0 0.0335 0.0409 0.9393 0.0003 24.0910 2.8163 2 1.9460 0.0596 0.8276 0.0002 121.440 1.5054 4 0.0163 0.2369 0.8825 0.0001 42.121 0.9476 6 0.3233 0.0540 0.8027 5.39E-05 373.180 0.4318 8 0.1240 0.0556 0.5896 5.46E-05 305.650 0.3772 10 0.0382 0.0086 0.5356 1.24E-05 246.080 0.0919", "type": "UncategorizedText", "metadata": { "page_number": 4 @@ -384,8 +384,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 5 @@ -416,8 +416,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 5 @@ -520,8 +520,8 @@ } }, { - "element_id": "90d8b72f086a37fbdc4f2847699a558f", - "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential \f 1.5 v, step potential 0.001 m/s and stop potential of þ 1.5 v set was used in this study.", + "element_id": "7d9d3615bab6ee55d73f76fac7b87756", + "text": "steps of the linear polarization plot are substituted to get corrosion current. Nova software was used with linear polarization resistance (LPR) and the current was set to 10 mA (maximum) and 10 nA (minimum). LSV staircase parameter start potential 1.5 v, step potential 0.001 m/s and stop potential of þ 1.5 v set was used in this study.", "type": "NarrativeText", "metadata": { "page_number": 7 diff --git a/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/75/29/main.PMC6312793.pdf.json index 1ac19d1443..ad897349b7 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-ingest-output-api/75/29/main.PMC6312793.pdf.json @@ -216,16 +216,16 @@ } }, { - "element_id": "a9232cb0e8af5f01ec54c6a83d9dc77a", - "text": "different problem instances of the MDVSP that can be used to evaluate the performance of the algorithms for the MDVSP. \f The data provide all the information that is required to model the MDVSP by using the existing mathematical formulations. \f All the problem instances are available for use without any restrictions. \f The benchmark solutions and solution time for the problem instances are presented in [", + "element_id": "ae2ec2f9015a920ca777ed80f83f9625", + "text": "different problem instances of the MDVSP that can be used to evaluate the performance of the algorithms for the MDVSP. The data provide all the information that is required to model the MDVSP by using the existing mathematical formulations. All the problem instances are available for use without any restrictions. The benchmark solutions and solution time for the problem instances are presented in [", "type": "ListItem", "metadata": { "page_number": 2 } }, { - "element_id": "c50e21ba526d57294dd30c3a7d0de831", - "text": "] and can be used for the comparison. \f The dataset includes a program that can generate similar problem instances of different sizes.", + "element_id": "ca6a4e8ba5cd29f28afbf1be79bfe9f0", + "text": "] and can be used for the comparison. The dataset includes a program that can generate similar problem instances of different sizes.", "type": "ListItem", "metadata": { "page_number": 2 @@ -272,8 +272,8 @@ } }, { - "element_id": "bb713b49b45041e1177c53667bb84c09", - "text": "\f The travel time, δ ij , between any two locations i ; j A 1 ; … ; l", + "element_id": "f562440bfb37c791547df5aaf08eaf45", + "text": "The travel time, δ ij , between any two locations i ; j A 1 ; … ; l", "type": "NarrativeText", "metadata": { "page_number": 2 @@ -296,8 +296,8 @@ } }, { - "element_id": "068844cf2dbe972764a886ce25559a07", - "text": "A trip j can be covered after trip i by the same vehicle, if t sj t ei þ l ei l sj . If l ei l sj , the vehicle must travel empty from l ei to l sj , otherwise, the vehicle may require waiting at l ei for the duration of ð t sj \f t ei Þ . A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satis fi ed:", + "element_id": "b0db1cc87b21aa17409de37b2f6a29d8", + "text": "A trip j can be covered after trip i by the same vehicle, if t sj t ei þ l ei l sj . If l ei l sj , the vehicle must travel empty from l ei to l sj , otherwise, the vehicle may require waiting at l ei for the duration of ð t sj t ei Þ . A schedule is given by the sequence in which a vehicle can cover the trips. The MDVSP is to determine the minimum number of schedules to cover all trips that minimizes total time in waiting and empty travel. The following requirements must be satis fi ed:", "type": "NarrativeText", "metadata": { "page_number": 3 @@ -320,8 +320,8 @@ } }, { - "element_id": "f6a12c82b0cad873085545d9f03462aa", - "text": "The description of the fi le for each problem instance is presented in Table 2. The fi rst line in the fi le provides the number of depots ð m Þ , the number of trips, ð n Þ , and the number of locations ð l Þ , in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1 ; … ; n f g , and provides the start location, the start time, the end location, and the end time of trip i The next l lines present the travel times between any two locations, i ; j A 1 ; … ; l \f \f .", + "element_id": "322b598a18278d3d39f6bf634841b89a", + "text": "The description of the fi le for each problem instance is presented in Table 2. The fi rst line in the fi le provides the number of depots ð m Þ , the number of trips, ð n Þ , and the number of locations ð l Þ , in the problem instance. The next n lines present the information for n trips. Each line corresponds to a trip, i A 1 ; … ; n f g , and provides the start location, the start time, the end location, and the end time of trip i The next l lines present the travel times between any two locations, i ; j A 1 ; … ; l .", "type": "NarrativeText", "metadata": { "page_number": 3 diff --git a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/2023-Jan-economic-outlook.pdf.json index 177707a4c6..69c34f8273 100644 --- a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/2023-Jan-economic-outlook.pdf.json @@ -8,8 +8,8 @@ } }, { - "element_id": "134b85dd5d5a6f2cba10c8d8be3014f8", - "text": " Global growth is projected to fall from an estimated", + "element_id": "5541540b0ecd19562b680b9a88d0ab10", + "text": "Global growth is projected to fall from an estimated", "type": "ListItem", "metadata": { "page_number": 2 @@ -192,16 +192,16 @@ } }, { - "element_id": "abffdd2fc3e6946f33c186230043598c", - "text": "percent.  The balance of risks remains tilted to the downside, but adverse risks have moderated since the October", + "element_id": "147cf252015c198005cba4f38b06ccb3", + "text": "percent. The balance of risks remains tilted to the downside, but adverse risks have moderated since the October", "type": "ListItem", "metadata": { "page_number": 2 } }, { - "element_id": "36eb32a5442921895e78ade5e942f237", - "text": "WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress.  In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-", + "element_id": "71a3ce4447887cd72b61482a17cc7ce7", + "text": "WEO. On the upside, a stronger boost from pent-up demand in numerous economies or a faster fall in inflation are plausible. On the downside, severe health outcomes in China could hold back the recovery, Russia’s war in Ukraine could escalate, and tighter global financing conditions could worsen debt distress. Financial markets could also suddenly reprice in response to adverse inflation news, while further geopolitical fragmentation could hamper economic progress. In most economies, amid the cost-of-living crisis, the priority remains achieving sustained disinflation. With tighter monetary conditions and lower growth potentially affecting financial and debt stability, it is necessary to deploy macroprudential tools and strengthen debt restructuring frameworks. Accelerating COVID-", "type": "ListItem", "metadata": { "page_number": 2 @@ -424,8 +424,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "0953470500eb215048fd49263b8829a4", + "text": "Forces Shaping the Outlook", "type": "Title", "metadata": { "page_number": 2 @@ -552,16 +552,16 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "c189a87e484a82091b03bd9ef1e8c1cc", + "text": "‘The Forecast", "type": "Title", "metadata": { "page_number": 4 } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "5779b9b7d25794d3b4ed1fe4e61f6617", + "text": "Growth Bottoming Out", "type": "Title", "metadata": { "page_number": 4 @@ -792,8 +792,8 @@ } }, { - "element_id": "0b099813975972d635e8a0cf837d8465", - "text": " Growth in emerging and developing Asia is expected to rise in", + "element_id": "43a91ccbe4a0a23db08e3ae389a46968", + "text": "Growth in emerging and developing Asia is expected to rise in", "type": "ListItem", "metadata": { "page_number": 5 @@ -1008,8 +1008,8 @@ } }, { - "element_id": "7d35c70c1e8004a5d298345187fa4428", - "text": " Growth in emerging and developing Europe is projected to have bottomed out in", + "element_id": "70e68a1bf24a1a4823291986069364b4", + "text": "Growth in emerging and developing Europe is projected to have bottomed out in", "type": "ListItem", "metadata": { "page_number": 5 @@ -1080,8 +1080,8 @@ } }, { - "element_id": "f49f32599594b7a7ccd5489de2e973a8", - "text": "At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries.  In Latin America and the Caribbean , growth is projected to decline from", + "element_id": "87177070c193e2e9c4b8612832234e9f", + "text": "At the current oil price cap level of the Group of Seven, Russian crude oil export volumes are not expected to be significantly affected, with Russian trade continuing to be redirected from sanctioning to non-sanctioning countries. In Latin America and the Caribbean , growth is projected to decline from", "type": "ListItem", "metadata": { "page_number": 5 @@ -1192,8 +1192,8 @@ } }, { - "element_id": "465316ba8865c6d57991eda545eb3141", - "text": ", although with a downward revision of percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth.  Growth in the Middle East and Central Asia is projected to decline from", + "element_id": "fc271d72923fe57910a8aa998fca99e2", + "text": ", although with a downward revision of percentage point, reflecting tighter financial conditions, lower prices of exported commodities, and downward revisions to trading partner growth. Growth in the Middle East and Central Asia is projected to decline from", "type": "ListItem", "metadata": { "page_number": 6 @@ -1288,8 +1288,8 @@ } }, { - "element_id": "2f7c343e3d306c442d89b48079771357", - "text": "reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust.  In sub-Saharan Africa , growth is projected to remain moderate at", + "element_id": "dabbab4e858d08eba5a2f134a2468eec", + "text": "reflects mainly lower oil production in line with an agreement through OPEC+ (Organization of the Petroleum Exporting Countries, including Russia and other non-OPEC oil exporters), while non-oil growth is expected to remain robust. In sub-Saharan Africa , growth is projected to remain moderate at", "type": "ListItem", "metadata": { "page_number": 6 @@ -1400,8 +1400,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "3dfc45d3333ae253d78008c8cde2d752", + "text": "Inflation Peaking", "type": "Title", "metadata": { "page_number": 6 @@ -1424,8 +1424,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "11ebd9f4c9a7cdbac41f8f7399d3950e", + "text": "Risks to the Outlook", "type": "Title", "metadata": { "page_number": 6 @@ -1536,8 +1536,8 @@ } }, { - "element_id": "c004ebe54d3a72bde0a491a0c6793c72", - "text": "up demand could also fuel a stronger rebound in China.", + "element_id": "0addd8bf2cac49052c8f892628ea7f48", + "text": "up demand could also fuel a stronger rebound in China. Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening.", "type": "ListItem", "metadata": { "page_number": 8 @@ -1546,14 +1546,6 @@ { "element_id": "668cd3ea4f48a2f080b7b764c04ab011", "text": "Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening.", - "type": "ListItem", - "metadata": { - "page_number": 8 - } - }, - { - "element_id": "aa10a3e36a453b783412d26db35979c9", - "text": " Faster disinflation: An easing in labor market pressures in some advanced economies due to falling vacancies could cool wage inflation without necessarily increasing unemployment. A sharp fall in the prices of goods, as consumers shift back to services, could further push down inflation. Such developments could imply a “softer” landing with less monetary tightening.", "type": "NarrativeText", "metadata": { "page_number": 8 @@ -1568,8 +1560,8 @@ } }, { - "element_id": "3cf43fcb56bd0edfd2f16912aeb9eb0b", - "text": " China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems.  War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase.  Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box", + "element_id": "7a73a50423524a9a6961bb50ffc06ad1", + "text": "China’s recovery stalling: Amid still-low population immunity levels and insufficient hospital capacity, especially outside the major urban areas, significant health consequences could hamper the recovery. A deepening crisis in the real estate market remains a major source of vulnerability, with risks of widespread defaults by developers and resulting financial sector instability. Spillovers to the rest of the world would operate primarily through lower demand and potentially renewed supply chain problems. War in Ukraine escalating: An escalation of the war in Ukraine remains a major source of vulnerability, particularly for Europe and lower-income countries. Europe is facing lower-than- anticipated gas prices, having stored enough gas to make shortages unlikely this winter. However, refilling storage with much-diminished Russian flows will be challenging ahead of next winter, particularly if it is a very cold one and China’s energy demand picks up, causing price spikes. A possible increase in food prices from a failed extension of the Black Sea grain initiative would put further pressure on lower-income countries that are experiencing food insecurity and have limited budgetary room to cushion the impact on households and businesses. With elevated food and fuel prices, social unrest may increase. Debt distress: Since October, sovereign spreads for emerging market and developing economies have modestly declined on the back of an easing in global financial conditions (Box", "type": "ListItem", "metadata": { "page_number": 8 @@ -1600,8 +1592,8 @@ } }, { - "element_id": "ec05bff366662baf45af9370e774428b", - "text": "percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs.  Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy.  Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy.  Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at  pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute.", + "element_id": "41b02a0fc3e8115c7efd4ae8258aa325", + "text": "percent of emerging market economies also at high risk. The combination of high debt levels from the pandemic, lower growth, and higher borrowing costs exacerbates the vulnerability of these economies, especially those with significant near-term dollar financing needs. Inflation persisting: Persistent labor market tightness could translate into stronger-than-expected wage growth. Higher-than-expected oil, gas, and food prices from the war in Ukraine or from a faster rebound in China’s growth could again raise headline inflation and pass through into underlying inflation. Such developments could cause inflation expectations to de-anchor and require an even tighter monetary policy. Sudden financial market repricing: A premature easing in financial conditions in response to lower headline inflation data could complicate anti-inflation policies and necessitate additional monetary tightening. For the same reason, unfavorable inflation data releases could trigger sudden repricing of assets and increase volatility in financial markets. Such movements could strain liquidity and the functioning of critical markets, with ripple effects on the real economy. Geopolitical fragmentation: The war in Ukraine and the related international sanctions aimed at pressuring Russia to end hostilities are splitting the world economy into blocs and reinforcing earlier geopolitical tensions, such as those associated with the US-China trade dispute.", "type": "ListItem", "metadata": { "page_number": 8 @@ -1648,8 +1640,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "a81cc4e3ca23fd16254e2b858cdcb00a", + "text": "Policy Priorities", "type": "Title", "metadata": { "page_number": 9 @@ -1720,72 +1712,32 @@ } }, { - "element_id": "aecb059bbaecf040b2f1317e90122724", - "text": "Strengthening multilateral cooperation— Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest:", - "type": "ListItem", - "metadata": { - "page_number": 10 - } - }, - { - "element_id": "5423fc3c57f44e1735ee34616a2fc673", - "text": "Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential.", - "type": "ListItem", - "metadata": { - "page_number": 10 - } - }, - { - "element_id": "836a0dc3c54dcef8746ecf1ba6491628", - "text": "Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle", - "type": "ListItem", - "metadata": { - "page_number": 10 - } - }, - { - "element_id": "d151d8da2864037689f037e7acda17ca", - "text": "income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes.", - "type": "ListItem", - "metadata": { - "page_number": 10 - } - }, - { - "element_id": "4513f7bf7587e58d8497591c7c3336b5", - "text": "Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO", - "type": "ListItem", - "metadata": { - "page_number": 10 - } - }, - { - "element_id": "ec65a030ee4f9487e8202fc6d136acc5", - "text": "based agreements, and fully restoring the WTO dispute settlement system.", + "element_id": "7f60a0e660214aa453d8b468cd3e2405", + "text": "Strengthening multilateral cooperation— Urgent action is needed to limit the risks stemming from geopolitical fragmentation and to ensure cooperation on fundamental areas of common interest: Restraining the pandemic: Global coordination is needed to resolve bottlenecks in the global distribution of vaccines and treatments. Public support for the development of new vaccine technologies and the design of systematic responses to future epidemics also remains essential. Addressing debt distress: Progress has been made for countries that requested debt treatment under the Group of Twenty’s Common Framework initiative, and more will be needed to strengthen it. It is also necessary to agree on mechanisms to resolve debt in a broader set of economies, including middle", "type": "ListItem", "metadata": { "page_number": 10 } }, { - "element_id": "c4d133ff38976d20a71a04db1d3ab69b", - "text": "Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low", + "element_id": "f31ce37823276fee0e5d0f3ebd00a580", + "text": "income countries that are not eligible under the Common Framework. Non– Paris Club and private creditors have a crucial role to play in ensuring coordinated, effective, and timely debt resolution processes. Strengthening global trade: Strengthening the global trading system would address risks associated with trade fragmentation. This can be achieved by rolling back restrictions on food exports and other essential items such as medicine, upgrading World Trade Organization (WTO) rules in critical areas such as agricultural and industrial subsidies, concluding and implementing new WTO", "type": "ListItem", "metadata": { "page_number": 10 } }, { - "element_id": "79bccd9d37b403b6ece2c775c9a84f94", - "text": "income countries facing shocks.", + "element_id": "60764db91056a5c02ff8e73f06c49bbe", + "text": "based agreements, and fully restoring the WTO dispute settlement system. Using the global financial safety net: With the cascading of shocks to the global economy, using the global financial safety net to its fullest extent is appropriate, including by proactively utilizing the IMF’s precautionary financial arrangements and channeling aid from the international community to low", "type": "ListItem", "metadata": { "page_number": 10 } }, { - "element_id": "ad3c2f8c46cf3b298bcd23e60ce7162c", - "text": "Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", + "element_id": "7d31c3d270d70fed3f87e262eefc6793", + "text": "income countries facing shocks. Speeding the green transition: To meet governments’ climate change goals, it is necessary to swiftly implement credible mitigation policies. International coordination on carbon pricing or equivalent policies would facilitate faster decarbonization. Global cooperation is needed to build resilience to climate shocks, including through aid to vulnerable countries.", "type": "ListItem", "metadata": { "page_number": 10 diff --git a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/Silent-Giant-(1).pdf.json b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/Silent-Giant-(1).pdf.json index 758c38f43f..24f07d7f6c 100644 --- a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/Silent-Giant-(1).pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/Silent-Giant-(1).pdf.json @@ -1,7 +1,7 @@ [ { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 1 @@ -104,8 +104,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 3 @@ -128,8 +128,8 @@ } }, { - "element_id": "622c83feb939057f8acb41b813d758e0", - "text": "T W h 2000 2010 2020 2030 2040 45,000 40,000 35,000 30,000 25,000 20,000 15,000 10,000 5,000 0  Marine  CSP  Solar PV  Geothermal  Wind  Bioenergy  Hydro  Nuclear  Gas  Oil  Coal", + "element_id": "d2614cf0acbae3b323a54da74e7138c3", + "text": "T W h 2000 2010 2020 2030 2040 45,000 40,000 35,000 30,000 25,000 20,000 15,000 10,000 5,000 0 Marine CSP Solar PV Geothermal Wind Bioenergy Hydro Nuclear Gas Oil Coal", "type": "FigureCaption", "metadata": { "page_number": 4 @@ -160,8 +160,8 @@ } }, { - "element_id": "d2d730d6d1d536548125f89f5862076d", - "text": "1990 1995 2000 2005 2010 2015 G W h 30,000,000 25,000,000 20,000,000 15,000,000 10,000,000 5,000,000 0  High-carbon  Low-carbon", + "element_id": "4f5794458e0082ff0f435cf6338dd1d4", + "text": "1990 1995 2000 2005 2010 2015 G W h 30,000,000 25,000,000 20,000,000 15,000,000 10,000,000 5,000,000 0 High-carbon Low-carbon", "type": "FigureCaption", "metadata": { "page_number": 4 @@ -176,8 +176,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 5 @@ -208,16 +208,16 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 5 } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 5 @@ -272,8 +272,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 6 @@ -328,8 +328,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 7 @@ -376,8 +376,8 @@ } }, { - "element_id": "bbaef205402a403fdb2bf9308d3f0c0a", - "text": "% France Sweden Switzerland 100 90 80 70 60 50 40 30 20 10 0  Coal  Gas/Oil  Biofuels/Waste  Wind/Solar  Hydro  Nuclear", + "element_id": "1b70d9207dfbb6ca0c03216eb2f35716", + "text": "% France Sweden Switzerland 100 90 80 70 60 50 40 30 20 10 0 Coal Gas/Oil Biofuels/Waste Wind/Solar Hydro Nuclear", "type": "FigureCaption", "metadata": { "page_number": 8 @@ -392,8 +392,8 @@ } }, { - "element_id": "c5aa0e91754d06be646b30795bab2e87", - "text": "T W h 600 500 400 300 200 100 0 1974 1980 1985 1990 1995 2000 2005 2010 2017  Non-hydro ren. & waste  Nuclear  Natural gas  Hydro  Oil  Coal", + "element_id": "536eba195911c26289e5179e02f16f84", + "text": "T W h 600 500 400 300 200 100 0 1974 1980 1985 1990 1995 2000 2005 2010 2017 Non-hydro ren. & waste Nuclear Natural gas Hydro Oil Coal", "type": "FigureCaption", "metadata": { "page_number": 9 @@ -456,8 +456,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 10 diff --git a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/recalibrating-risk-report.pdf.json index 2dfebc6514..76b0910209 100644 --- a/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3-small-batch/small-pdf-set/recalibrating-risk-report.pdf.json @@ -1,7 +1,7 @@ [ { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 1 @@ -72,8 +72,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 3 @@ -112,8 +112,8 @@ } }, { - "element_id": "69516d10744d3ef476952f1794e5f5aa", - "text": "Laypersons Experts 1 Nuclear power 20 2 Motor vehicles 1 3 Handguns 4 4 Smoking 2   17 Electric power (non-nuclear) 9   22 X-rays 7   30 Vaccinations 25", + "element_id": "36c400117de9c8e6e110381b8fee6d7a", + "text": "Laypersons Experts 1 Nuclear power 20 2 Motor vehicles 1 3 Handguns 4 4 Smoking 2 17 Electric power (non-nuclear) 9 22 X-rays 7 30 Vaccinations 25", "type": "FigureCaption", "metadata": { "page_number": 4 @@ -160,16 +160,16 @@ } }, { - "element_id": "ed7969125e9100f2a4c5b9b0892205be", - "text": "Artificial  11% Medicine  0.4% Fallout  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear Natural  48% Radon  14% Buildings & soil  12% Food & water  10% Cosmic  4% Thoron", + "element_id": "92d62677dba88955dc0cbc685a188166", + "text": "Artificial 11% Medicine 0.4% Fallout 0.4% Miscellaneous 0.2% Occupational 0.04% Nuclear Natural 48% Radon 14% Buildings & soil 12% Food & water 10% Cosmic 4% Thoron", "type": "FigureCaption", "metadata": { "page_number": 5 } }, { - "element_id": "6bf1b10bd14bdf9a954ee9b24b1f30c7", - "text": "Artificial  11% Medicine  0.4% Fallout  0.4% Miscellaneous  0.2% Occupational  0.04% Nuclear discharges Radon Buildings & soil Food & water Cosmic Thoron", + "element_id": "1433482ef2ed418e3bc8ac06e7acfe8f", + "text": "Artificial 11% Medicine 0.4% Fallout 0.4% Miscellaneous 0.2% Occupational 0.04% Nuclear discharges Radon Buildings & soil Food & water Cosmic Thoron", "type": "FigureCaption", "metadata": { "page_number": 5 @@ -272,8 +272,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 6 @@ -376,8 +376,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 8 @@ -392,8 +392,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 9 @@ -424,8 +424,8 @@ } }, { - "element_id": "e3b0c44298fc1c149afbf4c8996fb924", - "text": "", + "element_id": "36a9e7f1c95b82ffb99743e0c5c4ce95", + "text": " ", "type": "FigureCaption", "metadata": { "page_number": 9 diff --git a/test_unstructured_ingest/test-ingest-github.sh b/test_unstructured_ingest/test-ingest-github.sh index 515435ad5a..50ec3e5ce0 100755 --- a/test_unstructured_ingest/test-ingest-github.sh +++ b/test_unstructured_ingest/test-ingest-github.sh @@ -14,7 +14,7 @@ fi PYTHONPATH=. ./unstructured/ingest/main.py --github-url dcneiner/Downloadify --git-file-glob '*.html,*.txt' --structured-output-dir github-downloadify-output --verbose -if ! diff -ru github-downloadify-output test_unstructured_ingest/expected-structured-output/github-downloadify ; then +if ! diff -ru test_unstructured_ingest/expected-structured-output/github-downloadify github-downloadify-output ; then echo echo "There are differences from the previously checked-in structured outputs." echo diff --git a/test_unstructured_ingest/test-ingest-s3.sh b/test_unstructured_ingest/test-ingest-s3.sh index 4eed8b1d2d..47a39c85b3 100755 --- a/test_unstructured_ingest/test-ingest-s3.sh +++ b/test_unstructured_ingest/test-ingest-s3.sh @@ -11,7 +11,7 @@ fi PYTHONPATH=. ./unstructured/ingest/main.py --s3-url s3://utic-dev-tech-fixtures/small-pdf-set/ --s3-anonymous --structured-output-dir s3-small-batch-output -if ! diff -ru s3-small-batch-output test_unstructured_ingest/expected-structured-output/s3-small-batch ; then +if ! diff -ru test_unstructured_ingest/expected-structured-output/s3-small-batch s3-small-batch-output ; then echo echo "There are differences from the previously checked-in structured outputs." echo