From 0b67164d9673a98d2625b5c406796519e0c00136 Mon Sep 17 00:00:00 2001 From: Roger Barnes Date: Wed, 18 Aug 2010 22:39:42 +1000 Subject: [PATCH 2/2] Added Lane Cover scraper --- scraper_factory.rb | 1 + scrapers/lane_cove_scraper.rb | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 0 deletions(-) create mode 100644 scrapers/lane_cove_scraper.rb diff --git a/scraper_factory.rb b/scraper_factory.rb index 08ecf5d..a81fd3d 100644 --- a/scraper_factory.rb +++ b/scraper_factory.rb @@ -67,6 +67,7 @@ module Scrapers SydneyScraper.new("City of Sydney", "Sydney", "NSW"), LoganScraper.new("Logan City Council", "Logan", "QLD"), WoollahraScraper.new("Woollahra Municipal Council", "Woollahra", "NSW"), + LaneCoveScraper.new("Lane Cove Council", "Lane Cove", "NSW"), RandwickScraper.new("Randwick City Council", "Randwick", "NSW"), SutherlandScraper.new("Sutherland Shire Council", "Sutherland", "NSW"), ACTScraper.new("ACT Planning & Land Authority", "ACT", "ACT"), diff --git a/scrapers/lane_cove_scraper.rb b/scrapers/lane_cove_scraper.rb new file mode 100644 index 0000000..280707b --- /dev/null +++ b/scrapers/lane_cove_scraper.rb @@ -0,0 +1,54 @@ +require 'scraper' + +class LaneCoveScraper < Scraper + def applications(date) + # Doesn't seem to work without that nodeNum. I wonder what it is. + url = "http://ecouncil.lanecove.nsw.gov.au/eservice/daEnquiryInit.do?doc_type=8&nodeNum=328" + # We can't give a link directly to an application. Bummer. So, giving link to the search page + info_url = "http://ecouncil.lanecove.nsw.gov.au/eservice/daEnquiryInit.do?doc_type=8&nodeNum=328" + # Apparently comments should be submitted by email? + comment_url = "mailto:lccouncil@lanecove.nsw.gov.au" + page = agent.get(url) + + # submit a date search + formatted_date = "#{date.day}/#{date.month}/#{date.year}" + page.form_with(:action => 'daEnquiry.do') do |f| + f.field_with(:name => /dateFrom/).value = formatted_date + f.field_with(:name => /dateTo/).value = formatted_date + + page = f.submit() + end + + # The applications are headed by address. So, keep track of the address + applications = [] + address = nil + # No id on the subdiv, so it's hard to get to + page.at('#fullcontent').search('div')[5].children.each do |block| + case block.name + when "text", "comment", "script", "p" + # Do nothing + when "h4" + address = block.inner_text.strip + when "div" + # ignore divs not in the expected form (ie the last one containing the New Search link) + if not block.search('p')[0].nil? + description = block.search('p')[0].search('span')[1].inner_text.strip + application_id = block.search('p')[3].search('span')[1].inner_text.strip + date_received = block.search('p')[4].search('span')[1].inner_text.strip + + applications << DevelopmentApplication.new( + :address => address, + :date_received => date_received, + :description => description, + :application_id => application_id, + :info_url => info_url, + :comment_url => comment_url) + end + else + raise "Unexpected type: #{block.name} #{block}" + end + end + applications + end +end + -- 1.7.0.4