Skip to content
This repository has been archived by the owner on Jul 23, 2023. It is now read-only.

Commit

Permalink
generated PIE chart for email provider(s) used by companies in India
Browse files Browse the repository at this point in the history
  • Loading branch information
itzmeanjan committed Sep 15, 2019
1 parent 9f196a1 commit 8576473
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 14 deletions.
21 changes: 13 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
try:
from model.corporateStat import CompaniesUnderState
from util import *
from utilMultiState import plotAllCompaniesByStateUsingStatus, extractAllCompanyEmailProvider
from utilMultiState import *
except ImportError as e:
print('[!]Module Unavailable: {}'.format(str(e)))
exit(1)
Expand Down Expand Up @@ -93,13 +93,18 @@ def __getAllPossibleCompanyStatus__(companyDataSet):
__getAllPossibleCompanyStatus__(allCompanyStatus))
))
'''
allCompanies = map(
lambda v: CompaniesUnderState.importFromCSV(
__extract_state__(v), targetPath=join(targetPath, v)).companies,
filter(
lambda v: v.endswith('csv'), listdir(targetPath)))
print(extractAllCompanyEmailProvider(allCompanies))
return 1.0
return __divide__(
*__calculateSuccess__(
[plotTopEmailProvidersShare(
*extractAllCompanyEmailProvider(
map(
lambda v: CompaniesUnderState.importFromCSV(
__extract_state__(v),
targetPath=join(targetPath, v)).companies,
filter(
lambda v: v.endswith('csv'), listdir(targetPath)))),
'Email Service used by Companies in India',
'./plots/mca_email_service_used_by_companies.png')]))
except Exception:
return 0.0

Expand Down
Binary file added plots/mca_email_service_used_by_companies.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
42 changes: 36 additions & 6 deletions utilMultiState.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,32 @@ def __calculatePercentageOfCompaniesOfSimilarStatusInState__(data: Dict) -> floa
return False


def plotTopEmailProvidersShare(dataSet: Dict[str, int], total: int, title: str, targetPath: str) -> bool:
try:
wedgeSizes = [dataSet[i] for i in dataSet]
labels = ['{} ( {:.2f} % )'.format(i.capitalize(), dataSet[i]*100 / total)
for i in dataSet]
font = {
'family': 'serif',
'color': '#264040',
'weight': 'normal',
'size': 12
}
plt.figure(figsize=(24, 12), dpi=100)
patches, _ = plt.pie(wedgeSizes)
plt.legend(patches, labels, loc='best', fontsize='medium')
plt.title(title, fontdict=font)
plt.axis('equal')
plt.tight_layout()
plt.savefig(targetPath, bbox_inches='tight',
pad_inches=.5)
plt.close()
return True
except Exception as e:
print(e)
return False


'''
expected to take a chain of generator(s),
each of them generating a stream of model.corporateStat.Company object(s),
Expand All @@ -87,11 +113,12 @@ def __calculatePercentageOfCompaniesOfSimilarStatusInState__(data: Dict) -> floa
'''


def extractAllCompanyEmailProvider(dataStream: map) -> Dict[str, int]:
def extractAllCompanyEmailProvider(dataStream: map) -> (Dict[str, int], int):
# Extracts email service provider's name using regular expression

def __getEmailProvider__(email: str) -> str:
matchObj = reg.search(email)
return matchObj.group().replace('@', '').lower() if(matchObj) else None
return matchObj.group().lower() if(matchObj) else None

# Increments usage count email service provider & returns updated Dictionary
def __updateCounter__(holder: Dict[str, int], email: str) -> Dict[str, int]:
Expand All @@ -104,9 +131,11 @@ def __updateCounter__(holder: Dict[str, int], email: str) -> Dict[str, int]:
'''

# Keeps only top 5 elements ( having highest usage count ) in dictionary
def __cleanupCounter__(holder: Dict[str, int]) -> Dict[str, int]:
def __cleanupCounter__(holder: Dict[str, int], count: int, findTotal: bool = True) -> Dict[str, int]:
nonlocal total
total += sum(holder.values()) if findTotal else 0
return dict(map(lambda v: (v, holder[v]), sorted(
holder, key=lambda v: holder[v], reverse=True)[:5]))
holder, key=lambda v: holder[v], reverse=True)[:count]))

# merges two usage count holder dictionaries (one holding everything calculated upto this point )
# and another one holding record for a certain state ( which we just processed )
Expand All @@ -118,14 +147,15 @@ def __mergeTwoDicts__(first: Dict[str, int], second: Dict[str, int]) -> Dict[str
second, first)

try:
reg = reg_compile(r'(@.+)')
total = 0
reg = reg_compile(r'(?<=@)[^.]+(?=\.)')
# processes each state of India at a time & extracts top 5
# email service providers, finally we calculate top 5
# email service providers used by companies spread across different states of India
return __cleanupCounter__(reduce(lambda acc, cur:
__mergeTwoDicts__(acc, __cleanupCounter__(
reduce(lambda acc, cur: __updateCounter__(
acc, __getEmailProvider__(cur.email)), cur, {}))), dataStream, {}))
acc, __getEmailProvider__(cur.email)), cur, {}), 10)), dataStream, {}), 10, findTotal=False), total
except Exception:
return None

Expand Down

0 comments on commit 8576473

Please sign in to comment.