#!/usr/bin/env ruby # #-- # Created by Adam Beguelin on 2007-03-27. # Truveo Video Search Ruby API version 3. # Copyright (c) 2007. AOL LLC. # All rights reserved. #++ $VERBOSE = 0 require "net/http" require "uri" require "rexml/document" include REXML =begin rdoc TruveoResponse objects are returned from Truveo methods get_videos(), get_related_categories(), get_related_channels(), get_related_tags(), and get_related_users(). For example, the following line of code creates a new TruveoResponse object as the result of a call to Truveo.get_videos(). res = t.get_videos("funny") The video_set attribute is an array of the videos returned by the TruveoResponse.get_videos call. res.video_set.each{|v| ... } # iterates through the videos, each video is a hash of the metadata for that video The channel_set is a hash of the channels that match the query. The key is the name of the channel. The value is the count of the number of videos in that channel that match the query. res.channel_set.each_pair{|key,val| ... } The tag_set, cateogry_set, and user_set members are similar to the channel_set member described above. The TruveoResponse also implements the each method which supports iteration through the all the videos that can be returned by the get_videos() query that created the TruveoResponse, up to 1,000 videos. res = t.get_videos("funny") res.each{|v| puts v['title']} =end class TruveoResponse # Array of videos returned by the query. # res.video_set # <-- array of the videos that matched query attr :video_set, true # Hash of channels and their related counts for your query # res.channel_set # <-- hash of the matching channels attr :channel_set, true # Hash of categories and their related counts for your query # res.category_set # <-- hash of the matching categories attr :category_set, true # Hash of tags and their related counts for your query # res.tag_set # <-- hash of the matching tags attr :tag_set, true # Hash of users and their related counts for your query # res.user_set # <-- hash of the matching users attr :user_set, true # String containing method, i.e., 'truveo.videos.getVideos' # res.method attr :method, true # String containing the query used to create this response object # res.query attr :query, true # String containging the sorter used to create this response # res.sortby attr :sortby, true # String containing the query suggestion, if any # res.query_suggestion attr :query_suggestion, true # String indicating the number of total results that matched the query # res.total_results_available attr :total_results_available, true # String indicating the number of resutls returned in this result set. For get_videos() the following should be true: video_set.length == total_resutls_returned.to_i) # res.total_results_returned attr :total_results_returned, true # String representing the position of the first Video in the entire set of matching videos. # res.first_result_position attr :first_result_position, true # String containing the URL which will return an RSS feed for the set of videos returned in response to the submitted query. # res.rss_url attr :rss_url, true # String containing a human-readable title for the set of videos returned in response to the submitted request. # For example, this field would return a string such as "Most popular 'madonna' videos in Music on MTV" for the query 'madonna category:Music channel:MTV sort:mostPopular'. # res.video_set_title attr :video_set_title, true # String indicating the number of channel results returned # res.channel_set.length == res.channel_results_returned.to_i # <-- true attr :channel_results_returned, true # String indicating the number of category results returned # res.category_set.length == res.cagegory_results_returned.to_i # <-- true attr :category_results_returned, true # String indicating the number of tag results returned # res.tag_set.length == res.tag_results_returned.to_i # <-- true attr :tag_results_returned, true # String indicating the number of user results returned # res.user_set.length == res.user_results_returned.to_i # <-- true attr :user_results_returned, true # String indicating the integer code for the error if one occured attr :error_code, true # String containing text of error code if one occured. attr :error_text, true # :stopdoc: def initialize @vidlist = Array.new end attr :sphinxquery, true # sqphinx query used attr :sphinxfilters, true # sqphinx filters used attr :params, true # parameters sent to truveo api call when TruveoResponse was created attr :truveo, true # truveo object used to create the results, used for next_video # copy over state for next_video call def next_self(res) self.video_set = res.video_set self.channel_set = res.channel_set self.category_set = res.category_set self.tag_set = res.tag_set self.user_set = res.user_set self.query_suggestion = res.query_suggestion self.total_results_available = res.total_results_available self.total_results_returned = res.total_results_returned self.first_result_position = res.first_result_position self.rss_url = res.rss_url self.video_set_title = res.video_set_title self.channel_results_returned = res.channel_results_returned self.category_results_returned = res.category_results_returned self.tag_results_returned = res.tag_results_returned self.user_results_returned = res.user_results_returned self.sphinxquery = res.sphinxquery self.sphinxfilters = res.sphinxfilters end # :startdoc: =begin rdoc Iterate through all the videos in the response. Each video is a hash where the key is the metadata field, like title, and the value is the actual metadata. The videos are returned in whatever order was specified by the sorter, if any, used in the query that created the TruveoResponse object. The following goes through all the videos that match the query and prints the title. If more than one thousand videos match the query, the each method will only iterate through the first thousand. # create a Truveo object with my app id (apply for a free app id at http://developer.truveo.com/) t = Truveo.new("appid") res = t.get_videos("funny") # print lots of titles res.each{|vid| puts vid['title']} Note that the each method will invoke another get_videos() method behind the scenes. These calls will count against your daily limit. This means iterating through a thousand results using each will result in 100 calls to get_videos() by default. =end rdoc def each # :yields: video # if we've stored the results so fare @vidlist.each { |v| yield v } # get any more videos, storing the results for later calls to self.each while v = next_video do @vidlist << v yield v end end # :stopdoc: # get the next video, assumes a query has already been made, @video_set, @params, and @next_start have been set in get_videos_hash() def next_video return nil if video_set.nil? # get more video if video_set has been depleted if video_set.length < 1 next_start = first_result_position.to_i + total_results_returned.to_i # check for valid start, we never return more than 1000 results return nil if next_start >= 1000 # try to get another video_set self.params[:start] = next_start.to_s res = truveo.get_videos_hash(params) next_self(res) return nil if video_set.nil? || video_set.length < 1 end video_set.shift end # :startdoc: end =begin rdoc The Truveo class implements a Ruby version of the Truveo API (see the {Truveo Developer Site}[http://developer.truveo.com] for details). A Truveo object is initialized with an developer id, which is free and can be obtained from the {Truveo Developer Site}[http://developer.truveo.com]. A developer id will allow you to call the Truveo API up to 10,000 times per day. Currently user functions, like ratings, are not implemented in the Ruby API. =end class Truveo @@sorter = %w(sort:mostPopular sort:mostPopularNow sort:mostPopularThisWeek sort:mostPopularThisMonth sort:vrank sort:mostRecent sort:mostRelevant sort:topFavorites sort:highestRated) @@filter = %w(days_old: bitrate: type:free type:reg type:sub type:rent type:buy runtime: quality:poor quality:fair quality:good quality:excellent format:win format:real format:qt format:flash format:hi-q site: file_size:) # XXXX add these, should there be two types? One that takes a comparison and one that doesn't? @@modifier = %w(category: channel: tag: user: id: sim: title: description: artist: album: show: actor: director: writer: producer: distributor:) # array of Truveo supported sorters such as sort:vrank # # Example: # Truveo.sorter.each{|s| puts s } # print all the sorters # def Truveo.sorter @@sorter end # array of Truveo supported filters such as type:free # # Example: # Truveo.filter.each{|f| puts f } # print all the filters # def Truveo.filter @@filter end # array of Truveo supported modifiers such as tag: # # Example: # Truveo.modifier.each{|m| puts m } # print all the modifiers # def Truveo.modifier @@modifier end # Create a new Truveo object for querying the Truveo video search engine. # The appid is required. For your free appid go to {My API Account}[http://developer.searchvideo.com/APIMyAccount.php] # where you easily sign up for your own appid, allowing you up to 10,000 Truveo queries per day. # :call-seq: new(appid) # # Example: # t = Truveo.new('my_appid') # create a new Truveo object with your app_id # def initialize(appid, site = 'xml.searchvideo.com', path = "/apiv3?", port=80) @appid = appid @site = site @path = path @port = port end # generic rest call def rest(parms) # :nodoc: call = String.new(@path) parms.each{|k,v| call << "#{k}=#{v}&"} call.chop! call = URI.escape(call) api_site = Net::HTTP.new(@site,@port) # puts call xml = REXML::Document.new(body = api_site.request_get(call).body) return xml rescue REXML::ParseException => e puts "rest parse rescue: " # + e.to_s puts "zzz>>> #{body}<< e puts "rest rescue: " + e.to_s.split("\n")[0] pp e puts "site, path, parms: #{@site}, #{@path}, :#{call}:" return nil end # return the error as hash or nil if no error def api_error(xml) #:nodoc: if xml.nil? res = TruveoResponse.new res.error_code = '69' res.error_text = 'bad xml' return res end #Access Denied: invalid appid. if elt = xml.elements['//Error'] res = TruveoResponse.new res.error_code = elt.attributes["Code"] res.error_text = elt.text return res end nil end # convert an xml element to a string def elt_text(xml,s) #:nodoc: if elt = xml.elements[s] return elt.text end return nil end # convert xml to hash def xml_to_hash(xml) #:nodoc: h = Hash.new xml.elements.each{|e| h[e.name] = e.text } h end # performs the specified query and returns the results in the form of a TruveoResponse object. # query:: this is the search query to run. See {Building Search Queries}[http://developer.truveo.com/SearchQueryOverview.php] for details. # results:: number of results to return, the default is 10, the maximum is 50. # start:: where to start the result set being returned, defaults to 0. Can be used to page through results. # showRelatedItems:: indicates if the tag set, category set, and user set should be returned. Values should be 0 or 1, the default is 0. # tagResults:: number of tags requested in the tag_set response. # channelResults:: number of channels requested in the channel_set response. # categoryResults:: number of categories requested in the category_set response. # userResults:: number of users requested in the user_set response. # showAdult:: flag indicating whether or not adult content should be included in the result set. # All of the result sizes above are maximums. A query may return fewer than the requested number of results if the requested # number of results don't match. # # If the query string is empty, then the top results are returned in vrank order. # # If showRelatedItems is zero, the response will only includea a video_set. The tag_set, channel_set, category_set, and user_set # parameters will be left out. def get_videos(query='', results=10, start=0, showRelatedItems=0, tagResults=10, channelResults=10, categoryResults=10, userResults=10, showAdult=0) params = Hash.new params[:method] = 'truveo.videos.getVideos' params[:query] = query params[:showRelatedItems] = showRelatedItems params[:tagResults] = tagResults params[:channelResults] = channelResults params[:categoryResults] = categoryResults params[:userResults] = userResults params[:showAdult] = showAdult params[:appid] = @appid params[:results] = results params[:start] = start get_videos_hash(params) end def get_videos_hash(params) #:nodoc: xml = rest(params) res = TruveoResponse.new # save params for next_video call res.params = params res.truveo = self # check for error codes err = api_error(xml) puts "get_videos(#{params[:query]}) returned error: #{err.error_code} #{err.error_text}" if $VERBOSE && !err.nil? return err if !err.nil? res.method = elt_text(xml,'//Response/method') res.query = elt_text(xml,'//Response/query') res.sortby = elt_text(xml,'//Response/sortby') res.query_suggestion = elt_text(xml,'//Response/querySuggestion') res.total_results_available = elt_text(xml, '//VideoSet/totalResultsAvailable') res.total_results_returned = elt_text(xml, '//VideoSet/totalResultsReturned') res.first_result_position = elt_text(xml, '//VideoSet/firstResultPosition') res.rss_url = elt_text(xml, '//rssUrl') res.video_set_title = elt_text(xml, '//VideoSet/title') res.sphinxquery = elt_text(xml,'//Response/sphinxquery') res.sphinxfilters = elt_text(xml,'//Response/sphinxfilters') # store the video set res.video_set = Array.new @video_set = Array.new xml.elements.each('//Video') {|v| res.video_set << (tvid = xml_to_hash(v)) @video_set << tvid } @next_start = res.total_results_returned.to_i + res.first_result_position.to_i if res.total_results_returned.to_i != res.video_set.length puts "Warning: results mismatch: res.total_results_returned (#{res.total_results_returned}) != res.video_set.length (#{res.video_set.length})" # puts xml.to_s end # the channel_set is a hash, one entry per channel. the key is the channel name, the value is the count. res.channel_results_returned = elt_text(xml, '//ChannelSet/totalResultsReturned') res.channel_set = set_hash(xml,'//Channel') res.tag_results_returned = elt_text(xml, '//TagSet/totalResultsReturned') res.tag_set = set_hash(xml,'//Tag') res.category_results_returned = elt_text(xml, '//CategorySet/totalResultsReturned') res.category_set = set_hash(xml, '//Category') res.user_results_returned = elt_text(xml, '//UserSet/totalResultsReturned') res.user_set = set_hash(xml, '//User') res end # return the hash for the given set (category, channel, tag, or user) def set_hash(xml,s) #:nodoc: h = Hash.new xml.elements.each(s){|v| h[v.elements['name'].text] = v.elements['count'].text.to_i} h end # generic get_related for each of the get_related type calls def get_related(type='Tags', query='', results=10, start=0) #:nodoc: params = Hash.new params[:method] = "truveo.videos.getRelated#{type}" params[:query] = query params[:appid] = @appid params[:results] = results params[:start] = start xml = rest(params) res = TruveoResponse.new # check for error codes err = api_error(xml) return err if !err.nil? res.method = elt_text(xml,'//Response/method') res.query = elt_text(xml,'//Response/query') res.sortby = elt_text(xml,'//Response/sortby') res.query_suggestion = elt_text(xml,'//Response/querySuggestion') res.sphinxquery = elt_text(xml,'//Response/sphinxquery') res.sphinxfilters = elt_text(xml,'//Response/sphinxfilters') res.total_results_returned = elt_text(xml, '//totalResultsReturned') res.first_result_position = elt_text(xml, '//firstResultPosition') res.send("#{singularize(type).downcase}_set=", set_hash(xml,"//#{singularize(type)}")) res end # Return a hash of the tags and counts related to the query. # # The results and start parameters are used for paging through the result set. # # Example: # t = Truveo.new('appid_goes_here') # res = t.get_related_tags('funny') # res.tag_set.each_pair{|k,v| puts "tag: #{k} count: #{v}" } def get_related_tags(query='', results=10, start=0) get_related('Tags', query, results, start) end # Return a hash of the channels and counts related to the query. # # The results and start parameters are used for paging through the result set. def get_related_channels(query='', results=10, start=0) get_related('Channels',query, results, start) end # Return a hash of the users and counts related to the query. # # The results and start parameters are used for paging through the result set. def get_related_users(query='', results=10, start=0) get_related('Users',query, results, start) end # Return a hash of the categories and counts related to the query. # # The results and start parameters are used for paging through the result set. def get_related_categories(query='', results=10, start=0) get_related('Categories',query, results, start) end def singularize(s) #:nodoc: return s.chop if s =~ /(Tags|Channels|Users)/ return 'Category' if s =~ /Categories/ return s end private :singularize end