-
Notifications
You must be signed in to change notification settings - Fork 0
/
featureextraction.py
90 lines (81 loc) · 3.21 KB
/
featureextraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import geopandas as gpd
import pandas as pd
# Step 1: Data Collection and Preprocessing
data = gpd.read_file('N2K_2018_3035_v010.gpkg')
# Step 2: Feature Extraction
# Assuming you want to extract features related to N2K habitat categories
# Create empty columns for each habitat category
habitat_categories = [
"Urban fabric",
"Industrial, commercial and military units",
"Road networks and associated land",
"Railways and associated land",
"Port areas and associated land",
"Airports and associated land",
"Mineral extraction, dump and construction sites",
"Land without current use",
"Green urban, sports and leisure facilities",
"Arable irrigated and non-irrigated land",
"Greenhouses",
"Vineyards, fruit trees and berry plantations",
"Olive groves",
"Annual crops associated with permanent crops",
"Complex cultivation patterns",
"Land principally occupied by agriculture with significant areas of natural vegetation",
"Agro-forestry",
"Natural & semi-natural broadleaved forest",
"Highly artificial broadleaved plantations",
"Natural & semi natural coniferous forest",
"Highly artificial coniferous plantations",
"Natural & semi natural mixed forest",
"Highly artificial mixed plantations",
"Transitional woodland and scrub",
"Lines of trees and scrub",
"Damaged forest",
"Managed grassland",
"Semi-natural grassland with woody plants (C.C.D. ≥ 30 %)",
"Semi-natural grassland without woody plants (C.C.D. < 30%)",
"Alpine and sub-alpine natural grassland",
"Heathland and moorland",
"Alpine scrub land",
"Sclerophyllous scrubs",
"Sparsely vegetated areas",
"Beaches and dunes",
"River banks",
"Bare rocks, outcrops, cliffs",
"Burnt areas (except burnt forest)",
"Glaciers and perpetual snow",
"Inland marshes",
"Exploited peat bogs",
"Unexploited peat bogs",
"Salt marshes",
"Salines",
"Intertidal flats",
"Natural & semi-natural water courses",
"Highly modified water courses and canals",
"Seasonally connected water courses (oxbows)",
"Natural lakes",
"Reservoirs",
"Aquaculture ponds",
"Standing water bodies of extractive industrial sites",
"Lagoons",
"Estuaries",
"Marine inlets and fjords",
"Open sea",
"Coastal waters"
]
for category in habitat_categories:
data[category] = 0
# Set 1 for each N2K habitat category present in the data
for category in habitat_categories:
data.loc[data['N2K_habitat_category'] == category, category] = 1
# Extract the habitat category features
features = data[habitat_categories]
# Step 3: Data Labeling
# Assuming you have labeled the data with the impacts on protected species and habitats and stored it in a column called 'impact'
# 1 represents the presence of impact, 0 represents the absence of impact
labels = data['impact']
# Further preprocessing and normalization of the features and labels can be done if required
# Save the extracted features and labels to a CSV file
extracted_features = pd.concat([features, labels], axis=1)
extracted_features.to_csv('extracted_features.csv', index=False)