- 실습 데이터 (영국 교통사고 데이터)
사고 이력 : https://github.com/longnym/lecture/raw/master/sample_data/DfTRoadSafety_Accidents.zip
사상자 정보 : https://github.com/longnym/lecture/raw/master/sample_data/DfTRoadSafety_Casualties.zip
지역 코드 : https://github.com/longnym/lecture/raw/master/sample_data/DfTRoadSafety_District.zip
데이터 활용 가이드 : http://github.com/longnym/lecture/raw/master/sample_data/Road-Accident-Safety-Data-Guide.xls
- MapReduce Application 다운로드
http://github.com/longnym/lecture/raw/master/build/hadoop_ex01.jar
http://github.com/longnym/lecture/raw/master/build/hadoop_ex02.jar
http://github.com/longnym/lecture/raw/master/build/hadoop_ex03.jar
- Driver Class의 실행 (실습 1)
hadoop jar hadoop_ex01.jar -D inputPath=/input/acc/Accidents_2005_2015.csv -D outputPath=/output/result01 -D numReduceTasks=3 skill.coach.TestDriver
- Driver Class의 실행 (실습 2)
hadoop jar hadoop_ex02.jar -D inputPath=/input/acc/Accidents_2005_2015.csv -D outputPath=/output/result02 -D numReduceTasks=3 skill.coach.TestDriver
- Driver Class의 실행 (실습 3)
hadoop jar hadoop_ex03.jar -D inputPath1=/input/acc/Accidents_2005_2015.csv -D inputPath2=/input/cas/Casualties_2005_2015.csv -D cachePath=/input/dis/District.txt -D tempPath=/output/result03_1 -D outputPath=/output/result03_2 -D numReduceTasks=3 skill.coach.TestDriver
- Yarn Application의 실행
yarn jar simple-yarn-app-1.1.0.jar com.hortonworks.simpleyarnapp.Client /bin/date 2 hdfs:///test/simple-yarn-app-1.1.0.jar
- Hive Table 생성 (Accidents)
CREATE EXTERNAL TABLE IF NOT EXISTS ACCIDENTS (
Accident_Index STRING,
Location_Easting_OSGR INT,
Location_Northing_OSGR INT,
Longitude DOUBLE,
Latitude DOUBLE,
Police_Force INT,
Accident_Severity INT,
Number_of_Vehicles INT,
Number_of_Casualties INT,
Date_Acc STRING,
Day_of_Week INT,
Time STRING,
District INT,
Highway STRING,
first_Road_Class INT,
first_Road_Number INT,
Road_Type INT,
Speed_limit INT,
Junction_Detail INT,
Junction_Control INT,
second_Road_Class INT,
second_Road_Number INT,
Pedestrian_Crossing_Human_Control INT,
Pedestrian_Crossing_Physical_Facilities INT,
Light_Conditions INT,
Weather_Conditions INT,
Road_Surface_Conditions INT,
Special_Conditions_at_Site INT,
Carriageway_Hazards INT,
Urban_or_Rural_Area INT,
Did_Police_Officer_Attend_Scene_of_Accident INT,
Lower_Super_Ouput_Area_of_Accident_Location STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
LOCATION '/input/acc';
- Hive Table 생성 (Casualties)
CREATE EXTERNAL TABLE IF NOT EXISTS CASUALTIES (
Acc_Index STRING,
Vehicle_Reference INT,
Casualty_Reference INT,
Casualty_Class INT,
Sex_of_Casualty INT,
Age_Band_of_Casualty INT,
Casualty_Severity INT,
Pedestrian_Location INT,
Pedestrian_Movement INT,
Car_Passenger INT,
Bus_or_Coach_Passenger INT,
Pedestrian_Road_Maintenance_Worker INT,
Casualty_Type INT,
Casualty_Home_Area_Type INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
LOCATION '/input/cas';
- PIG Table 생성 (Accidents)
A = LOAD '/input/acc/Accidents_2005_2015.csv' USING PigStorage(',') AS (
Accident_Index:chararray, Location_Easting_OSGR:chararray, Location_Northing_OSGR:chararray,
Longitude:chararray,
Latitude:chararray,
Police_Force:chararray,
Accident_Severity:chararray,
Number_of_Vehicles:chararray,
Number_of_Casualties:chararray,
Date_Acc:chararray,
Day_of_Week:chararray,
Time:chararray,
District:chararray,
Highway:chararray,
first_Road_Class:chararray,
first_Road_Number:chararray,
Road_Type:chararray,
Speed_limit:chararray,
Junction_Detail:chararray,
Junction_Control:chararray,
second_Road_Class:chararray,
second_Road_Number:chararray,
Pedestrian_Crossing_Human_Control:chararray,
Pedestrian_Crossing_Physical_Facilities:chararray,
Light_Conditions:chararray,
Weather_Conditions:chararray,
Road_Surface_Conditions:chararray,
Special_Conditions_at_Site:chararray,
Carriageway_Hazards:chararray,
Urban_or_Rural_Area:chararray,
Did_Police_Officer_Attend_Scene_of_Accident:chararray,
Lower_Super_Ouput_Area_of_Accident_Location:chararray);
- Hive QL
set hive.execution.engine=mr
select a.Day_of_Week,c.Sex_of_Casualty, count(1) from accidents a join casualties c on a.accident_index = c.acc_index group by Day_of_Week,Sex_of_Casualty;
- MySQL 샘플 데이터
- Sqoop Import
sqoop import --driver com.mysql.jdbc.Driver --connect jdbc:mysql://localhost/employees --username root --table employees --target-dir /test/employees
- Sqoop Export
sqoop export --driver com.mysql.jdbc.Driver --connect jdbc:mysql://localhost/employees --username root --table employees --export-dir /test/employees