Skip to content

Commit 0447dc9

Browse files
author
hridayns
committed
initial commit
0 parents  commit 0447dc9

File tree

6 files changed

+316
-0
lines changed

6 files changed

+316
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*.sublime-project
2+
*.sublime-workspace
3+
.vagrant
4+
*.txt

Vagrantfile

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# -*- mode: ruby -*-
2+
# vi: set ft=ruby :
3+
4+
# All Vagrant configuration is done below. The "2" in Vagrant.configure
5+
# configures the configuration version (we support older styles for
6+
# backwards compatibility). Please don't change it unless you know what
7+
# you're doing.
8+
Vagrant.configure("2") do |config|
9+
# The most common configuration options are documented and commented below.
10+
# For a complete reference, please see the online documentation at
11+
# https://docs.vagrantup.com.
12+
13+
# Every Vagrant development environment requires a box. You can search for
14+
# boxes at https://atlas.hashicorp.com/search.
15+
config.vm.box = "hackstreet/hadoop"
16+
17+
# Disable automatic box update checking. If you disable this, then
18+
# boxes will only be checked for updates when the user runs
19+
# `vagrant box outdated`. This is not recommended.
20+
# config.vm.box_check_update = false
21+
22+
# Create a forwarded port mapping which allows access to a specific port
23+
# within the machine from a port on the host machine. In the example below,
24+
# accessing "localhost:8080" will access port 80 on the guest machine.
25+
# config.vm.network "forwarded_port", guest: 80, host: 8080
26+
27+
# Create a private network, which allows host-only access to the machine
28+
# using a specific IP.
29+
# config.vm.network "private_network", ip: "192.168.33.10"
30+
31+
# Create a public network, which generally matched to bridged network.
32+
# Bridged networks make the machine appear as another physical device on
33+
# your network.
34+
# config.vm.network "public_network"
35+
36+
# Share an additional folder to the guest VM. The first argument is
37+
# the path on the host to the actual folder. The second argument is
38+
# the path on the guest to mount the folder. And the optional third
39+
# argument is a set of non-required options.
40+
config.vm.synced_folder "./shareFiles", "/home/piggyBoi"
41+
42+
# Provider-specific configuration so you can fine-tune various
43+
# backing providers for Vagrant. These expose provider-specific options.
44+
# Example for VirtualBox:
45+
#
46+
# config.vm.provider "virtualbox" do |vb|
47+
# # Display the VirtualBox GUI when booting the machine
48+
# vb.gui = true
49+
#
50+
# # Customize the amount of memory on the VM:
51+
# vb.memory = "1024"
52+
# end
53+
#
54+
# View the documentation for the provider you are using for more
55+
# information on available options.
56+
57+
# Define a Vagrant Push strategy for pushing to Atlas. Other push strategies
58+
# such as FTP and Heroku are also available. See the documentation at
59+
# https://docs.vagrantup.com/v2/push/atlas.html for more information.
60+
# config.push.define "atlas" do |push|
61+
# push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME"
62+
# end
63+
64+
# Enable provisioning with a shell script. Additional provisioners such as
65+
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
66+
# documentation for more information about their specific syntax and use.
67+
# config.vm.provision "shell", inline: <<-SHELL
68+
# apt-get update
69+
# apt-get install -y apache2
70+
# SHELL
71+
end

shareFiles/pig_script.pig

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
REGISTER /opt/pig-0.17.0/lib/piggybank.jar;
2+
3+
raw_data = LOAD '/home/piggyBoi/sample_log' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader() AS (ipaddr: chararray, logname: chararray, temp0: chararray,time: chararray, temp1: chararray, temp3: chararray, temp4: chararray, temp5: int, temp6: int, page_link: chararray, temp7: chararray);
4+
5+
grpd_by_page = Group raw_data by page_link;
6+
7+
page_hits = FOREACH grpd_by_page GENERATE flatten($0), COUNT($1) as page_count;
8+
9+
STORE page_hits INTO 'parsed_log';
10+
11+
/*
12+
ordrd_list = ORDER page_hits BY page_count DESC;
13+
14+
Top_page = LIMIT ordrd_list 1;
15+
16+
store Top_page into 'top_page_res' using PigStorage(',');
17+
18+
DEFINE DATE_EXTRACT org.apache.pig.piggybank.evaluation.util.apachelogparser.DateExtractor('dd/MMM/yyyy:HH:mm:ss Z','d/M/y');
19+
20+
grpd = GROUP raw_data BY DATE_EXTRACT(time);
21+
22+
hits_per_day = foreach grpd generate flatten($0),COUNT($1);
23+
24+
store hits_per_day into 'hits_per_day' using PigStorage(',');*/

shareFiles/pig_test_script.pig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
REGISTER /opt/pig-0.17.0/lib/piggybank.jar;
2+
REGISTER 'script.py' using jython as myfuncs;
3+
4+
raw_data = LOAD '/home/piggyBoi/chat1.txt' USING PigStorage('\n') AS (line:chararray);
5+
6+
data = FOREACH raw_data GENERATE FLATTEN(myfuncs.customParser(line));
7+
8+
/*grp_by_sender = GROUP data BY sender;*/
9+
10+
grp_by_word_id = GROUP data BY result::words.id;
11+
12+
STORE grp_by_word_id INTO 'parsed_chat';

0 commit comments

Comments
 (0)