Skip to content

Commit ac22294

Browse files
Merge pull request #39 from diffgram/add-import-sample
Add import EPRI dataset script sample
2 parents 9631f3e + a78736b commit ac22294

File tree

6 files changed

+349
-0
lines changed

6 files changed

+349
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Overhead-Distribution-Labels.csv
2+
images/*.JPG
3+
.env
4+
.DS_store
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
## Description
2+
3+
This is a simple script to import [EPRI Distribution Inspection Imagery](https://www.kaggle.com/datasets/dexterlewis/epri-distribution-inspection-imagery) to Diffgram
4+
5+
So far we, it only imports annotation with type **polygon** and skips annotations with type **line** (polyline type of instance doesn't exist on Diffgram yet), but that will be improved in future version
6+
7+
## Usage
8+
9+
1. Create and activate virtual environment
10+
11+
```
12+
virtualenv your-env-name
13+
source your-env-name/bin/activate
14+
```
15+
16+
2. Install dependencies from **requirements.txt**:
17+
18+
```
19+
pip install -r requirements.txt
20+
```
21+
22+
3. Download [annotations file](https://publicstorageaccnt.blob.core.windows.net/drone-distribution-inspection-imagery/Overhead-Distribution-Labels.csv) and place it to the root folder
23+
24+
4. Download images and unzip them to **images** folder
25+
26+
5. Create .env file and set environmental variables:
27+
28+
```
29+
touch .env
30+
```
31+
32+
```
33+
PROJECT_STRING_ID=project-string-id
34+
CLIENT_ID=client-id
35+
CLIENT_SECRET=client-secret
36+
HOST=https://example.com
37+
```
38+
39+
6. Run script:
40+
41+
```
42+
python import.py
43+
```
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import os
2+
import ast
3+
import pandas as pd
4+
from dotenv import load_dotenv
5+
from diffgram import Project
6+
import time
7+
from azure.storage.blob import BlobServiceClient, __version__
8+
9+
load_dotenv()
10+
11+
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
12+
blob_url = os.getenv('BLOCK_CONTAINER_URL')
13+
bucket_name = os.getenv('BUCKET_NAME')
14+
15+
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
16+
container_client = blob_service_client.get_container_client(bucket_name)
17+
18+
azure_image_list = container_client.list_blobs()
19+
20+
image_list = []
21+
for image in azure_image_list:
22+
image_list.append(image.name)
23+
24+
25+
project = Project(
26+
project_string_id = os.getenv('PROJECT_STRING_ID'),
27+
client_id = os.getenv('CLIENT_ID'),
28+
client_secret = os.getenv('CLIENT_SECRET'),
29+
host = os.getenv('HOST')
30+
)
31+
32+
list = project.directory.get(name="Default").list_files()
33+
34+
for file in list:
35+
original_filename = file.__dict__['original_filename']
36+
initia_filename = original_filename.replace('_', ' (').replace('.', ').')
37+
if initia_filename in image_list:
38+
image_list.remove(initia_filename)
39+
40+
shema_list = project.get_label_schema_list()
41+
42+
number_of_images = None
43+
while True:
44+
try:
45+
number_of_images_to_import = input("How many images do you want to import? (blank to import all) ")
46+
if number_of_images_to_import == '':
47+
number_of_images = len(image_list)
48+
break
49+
number_of_images = int(number_of_images_to_import)
50+
break
51+
except:
52+
print("Invalid input: please input positive number")
53+
54+
image_list = image_list[:number_of_images]
55+
56+
new_schema_name = None
57+
imported_label_traker = []
58+
lables_objects = {}
59+
while True:
60+
try:
61+
new_schema_name = input("Shema name (if shema with this name already exists - it will be used, otherwise new shema will be created): ")
62+
shema_list = project.get_label_schema_list()
63+
schema = [existing_schema for existing_schema in shema_list if existing_schema.get('name') == new_schema_name]
64+
if not schema:
65+
schema = project.new_schema(name=new_schema_name)
66+
print("Schema successfully created")
67+
else:
68+
schema = schema[0]
69+
schema_label_list = project.get_label_list(schema.get('id'))
70+
for label in schema_label_list:
71+
imported_label_traker.append(label['label']['name'])
72+
lables_objects[label['label']['name']] = label
73+
pass
74+
break
75+
except:
76+
print("Seems like schema with this name already exists")
77+
78+
df = None
79+
while True:
80+
try:
81+
annotation_file_name = input("What is the name of the file with annotations? (leave blank to use default Overhead-Distribution-Labels.csv)")
82+
if not annotation_file_name:
83+
df = pd.read_csv ('Overhead-Distribution-Labels.csv')
84+
break
85+
df = pd.read_csv (annotation_file_name)
86+
break
87+
except:
88+
print("Seems like annotation file is not here")
89+
90+
succeslully_imported = []
91+
import_errors = []
92+
93+
for image in image_list:
94+
image_relate_df = df[df['External ID'] == image]
95+
labels = image_relate_df['Label']
96+
external_id = image_relate_df['External ID']
97+
98+
instance_list = []
99+
100+
for label in labels:
101+
label_dict = ast.literal_eval(label)
102+
103+
for object in label_dict['objects']:
104+
label = {}
105+
106+
if object['value'] not in imported_label_traker:
107+
label = project.label_new({'name': object['value']}, schema.get('id'))
108+
lables_objects[label['label']['name']] = label
109+
else:
110+
label = lables_objects[object['value']]
111+
112+
polygone = object.get('polygon')
113+
line = object.get('line')
114+
115+
if polygone:
116+
instance_list.append({
117+
"type": 'polygon',
118+
"points": polygone,
119+
"label_file_id": label['id']
120+
})
121+
elif line:
122+
pass
123+
else:
124+
pass
125+
126+
imported_label_traker.append(object['value'])
127+
128+
try:
129+
result = project.file.from_local(
130+
path=f'./images/{image}',
131+
instance_list = instance_list,
132+
convert_names_to_label_files=False
133+
)
134+
135+
succeslully_imported.append(image)
136+
137+
print(f'{image} has been imported with {len(instance_list)} annotation(s)')
138+
except:
139+
import_errors.append(image)
140+
print(f'Error ocurred while importing {image}')
141+
142+
print(f"Successfully imported {len(succeslully_imported)} file(s): ", succeslully_imported)
143+
print(f"Errors while importing {len(succeslully_imported)} file(s): ", import_errors)

sdk/samples/EPRI dataset import/images/.gitkeep

Whitespace-only changes.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import os
2+
import ast
3+
import pandas as pd
4+
from dotenv import load_dotenv
5+
from diffgram import Project
6+
import time
7+
8+
start_time = time.time()
9+
10+
load_dotenv()
11+
12+
image_list = os.listdir('images')
13+
14+
project = Project(
15+
project_string_id = os.getenv('PROJECT_STRING_ID'),
16+
client_id = os.getenv('CLIENT_ID'),
17+
client_secret = os.getenv('CLIENT_SECRET'),
18+
host = os.getenv('HOST')
19+
)
20+
21+
list = project.directory.get(name="Default").list_files()
22+
23+
for file in list:
24+
original_filename = file.__dict__['original_filename']
25+
initia_filename = original_filename.replace('_', ' (').replace('.', ').')
26+
if initia_filename in image_list:
27+
image_list.remove(initia_filename)
28+
29+
shema_list = project.get_label_schema_list()
30+
31+
number_of_images = None
32+
while True:
33+
try:
34+
number_of_images_to_import = input("How many images do you want to import? (blank to import all) ")
35+
if number_of_images_to_import == '':
36+
number_of_images = len(image_list)
37+
break
38+
number_of_images = int(number_of_images_to_import)
39+
break
40+
except:
41+
print("Invalid input: please input positive number")
42+
43+
image_list = image_list[:number_of_images]
44+
45+
new_schema_name = None
46+
imported_label_traker = []
47+
lables_objects = {}
48+
while True:
49+
try:
50+
new_schema_name = input("Shema name (if shema with this name already exists - it will be used, otherwise new shema will be created): ")
51+
shema_list = project.get_label_schema_list()
52+
schema = [existing_schema for existing_schema in shema_list if existing_schema.get('name') == new_schema_name]
53+
if not schema:
54+
schema = project.new_schema(name=new_schema_name)
55+
print("Schema successfully created")
56+
else:
57+
schema = schema[0]
58+
schema_label_list = project.get_label_list(schema.get('id'))
59+
for label in schema_label_list:
60+
imported_label_traker.append(label['label']['name'])
61+
lables_objects[label['label']['name']] = label
62+
pass
63+
break
64+
except:
65+
print("Seems like schema with this name already exists")
66+
67+
df = None
68+
while True:
69+
try:
70+
annotation_file_name = input("What is the name of the file with annotations? (leave blank to use default Overhead-Distribution-Labels.csv)")
71+
if not annotation_file_name:
72+
df = pd.read_csv ('Overhead-Distribution-Labels.csv')
73+
break
74+
df = pd.read_csv (annotation_file_name)
75+
break
76+
except:
77+
print("Seems like annotation file is not here")
78+
79+
succeslully_imported = []
80+
import_errors = []
81+
82+
for image in image_list:
83+
image_relate_df = df[df['External ID'] == image]
84+
labels = image_relate_df['Label']
85+
external_id = image_relate_df['External ID']
86+
87+
instance_list = []
88+
89+
for label in labels:
90+
label_dict = ast.literal_eval(label)
91+
92+
for object in label_dict['objects']:
93+
label = {}
94+
95+
if object['value'] not in imported_label_traker:
96+
label = project.label_new({'name': object['value']}, schema.get('id'))
97+
lables_objects[label['label']['name']] = label
98+
else:
99+
label = lables_objects[object['value']]
100+
101+
polygone = object.get('polygon')
102+
line = object.get('line')
103+
104+
if polygone:
105+
instance_list.append({
106+
"type": 'polygon',
107+
"points": polygone,
108+
"label_file_id": label['id']
109+
})
110+
elif line:
111+
pass
112+
else:
113+
pass
114+
115+
imported_label_traker.append(object['value'])
116+
117+
try:
118+
result = project.file.from_local(
119+
path=f'./images/{image}',
120+
instance_list = instance_list,
121+
convert_names_to_label_files=False
122+
)
123+
124+
succeslully_imported.append(image)
125+
126+
print(f'{image} has been imported with {len(instance_list)} annotation(s)')
127+
except:
128+
import_errors.append(image)
129+
print(f'Error ocurred while importing {image}')
130+
131+
print(f"Successfully imported {len(succeslully_imported)} file(s): ", succeslully_imported)
132+
print(f"Errors while importing {len(succeslully_imported)} file(s): ", import_errors)
133+
134+
print("--- %s seconds ---" % (time.time() - start_time))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
azure-core==1.24.2
2+
azure-storage-blob==12.13.0
3+
certifi==2022.6.15
4+
cffi==1.15.1
5+
charset-normalizer==2.1.0
6+
cryptography==37.0.4
7+
diffgram==0.8.5
8+
idna==3.3
9+
imageio==2.19.5
10+
isodate==0.6.1
11+
msrest==0.7.1
12+
numpy==1.23.1
13+
oauthlib==3.2.0
14+
pandas==1.4.3
15+
Pillow==9.2.0
16+
pycparser==2.21
17+
python-dateutil==2.8.2
18+
python-dotenv==0.20.0
19+
pytz==2022.1
20+
requests==2.28.1
21+
requests-oauthlib==1.3.1
22+
scipy==1.8.1
23+
six==1.16.0
24+
typing_extensions==4.3.0
25+
urllib3==1.26.10

0 commit comments

Comments
 (0)