#!/usr/bin/ruby require 'rubygems' require 'cgi-spa' require 'htmlentities' # http://www.quotationspage.com/quote/692.html source = File.join(File.dirname(__FILE__),'validator-output') data=open("#{source}/index.html?doc=http:%2F%2F#{$param.site}%2F").read notes=data.scan(/
  • .*?<\/li>/m) $rules = [] def rule *args keywords = args.slice!(-1) args.each do |pattern| $rules << [pattern, keywords[:code], keywords[:weight]] end end rule %r{Using windows-1252 instead of the declared encoding}, :code => :encoding_warning, :weight => 0 rule %r{Changing character encoding utf-8 and reparsing}, :code => :encoding_warning, :weight => 0 rule %r{Bad value \d+\.\d+ for attribute content on element .*meta}, %r{Bad value \d+;url=.* for attribute content on element .*meta}, :code => :meta_refresh, :weight => 10 rule %r{Obsolete doctype\.}, %r{Legacy doctype\.}, %r{Quirky doctype\.}, %r{skipping entity: \[dtd\]}, %r{Almost standards mode doctype\.}, :code => :versioned_doctype, :weight => 15 rule %r{Self-closing syntax \(/\>\) used on a non-void HTML element}, :code => :self_closing_where_closing_is_optional, :weight => 16 rule %r{Start tag seen without seeing a doctype first\.}, :code => :missing_doctype, :weight => 19 rule %r{not allowed as child of element .* :span_semantics, :weight => 20 rule %r{End of file seen and there were open elements}, :code => :optional, :weight => 25 rule %r{[Cc]onsecutive hyphens}, %r{trailing hyphen in a comment}, :code => :consecutive_hyphens, :weight => 28 rule %r{= in an unquoted attribute value}, %r{No space between the doctype public and system identifiers}, :code => :unquoted_equals, :weight => 28 rule %r{No space between attributes}, :code => :no_space, :weight => 29 rule %r{Attribute y-}, %r{Attribute _sp}, :code => :extensibility_prefix, :weight=>35 rule %r{Attribute xmlns:.* not allowed here}, %r{local name xmlns:.* is not serializable as XML 1.0}, :code => :extensibility_xmlns, :weight=>36 rule %r{Bad value .* for attribute http-equiv}, :code => :extensibility_meta, :weight=>38 rule %r{Element .*script<.* not allowed as child of element <}, %r{Stray script start tag}, :code => :script_anywhere, :weight=>39 rule %r{unnamed script}, :code => :unnamed_script, :weight=>40 rule %r{Presentational_elements_and_attributes}, :code => :css, :weight => 45 rule %r{Required attributes missing on element .*link}, %r{Required attributes missing on element .*area}, %r{Required attributes missing on element .*style}, %r{Required attributes missing on element .*script}, :code => :optional_attrs, :weight => 46 rule %r{center}, :code => :css, :weight => 45 rule %r{attribute on the .* element is obsolete}, %r{The Content-Language state is obsolete}, %r{The .* attribute is obsolete}, %r{The .* element is obsolete}, :code => :versioned, :weight => 45 # http://www.aptana.com/reference/html/api/HTML.field.vspace.html rule %r{Attribute border not allowed on element}, %r{Attribute color not allowed on element .*hr<}, %r{Attribute background not allowed on element}, %r{Attribute hspace not allowed on element}, %r{Attribute vspace not allowed on element}, %r{Attribute height not allowed on element}, :code => :versioned, :weight => 45 # http://www.aptana.com/reference/html/api/HTML.field.allowtransparency.html rule %r{Attribute allowtransparency not allowed on element}, :code => :browser_specific_markup, :weight => 45 rule %r{Attribute marginheight not allowed on element}, %r{Attribute leftmargin not allowed on element}, %r{Attribute marginwidth not allowed on element}, %r{Attribute topmargin not allowed on element}, %r{Attribute width not allowed on element}, :code => :nonstd_attribute, :weight => 46 # http://msdn.microsoft.com/en-us/library/ms533486(VS.85).aspx # https://developer.mozilla.org/en/How_to_Turn_Off_Form_Autocompletion rule %r{Attribute autocomplete not allowed on element}, :code => :nonnstd_element, :weight=>49 rule %r{nobr}, :code => :nonstd_element, :weight => 49 rule %r{& did not start a character reference}, :code => :escape, :weight => 55 rule %r{No .* element in list scope but a .* end tag seen}, %r{Stray end tag }, %r{No element font to close}, %r{No element a to close}, %r{violates nesting rules}, %r{End tag .* seen but there were unclosed elements}, %r{End tag for .* seen, but there were unclosed elements}, :code => :unmatched_close, :weight => 65 rule %r{A table row was \d+ columns wide, which is less than the column count established by the first row}, %r{A table row was \d+ columns wide and exceeded the column count established by the first row}, %r{Table column \d+ .* has no cells beginning in it}, :code => :table_width, :weight => 71 rule %r{COMPATIBILITY_CHARACTER in PATH\.}, :code => :compat_char, :weight => 74 rule %r{The character encoding .* is not widely supported}, %r{Using gbk instead of the declared encoding}, :code => :char_encoding, :weight => 73 rule %r{Bad value +http:.*? for attribute href}, %r{Bad value +http:.*? for attribute src}, %r{href on element .*a<.*: DOUBLE_WHITESPACE}, %r{href on element .*area<.*: DOUBLE_WHITESPACE}, %r{src on element .*img<.*: DOUBLE_WHITESPACE}, %r{href on element .*a<.*: CONTROL_CHARACTER}, %r{href on element .*a<.*: WHITESPACE in QUERY}, %r{src on element .*img<.*: WHITESPACE in PATH}, :code => :urispace, :weight => 75 rule %r{Zero is not a positive integer}, :code => :nonpositive, :weight => 79 rule %r{An ID must not be the empty string}, :code => :empty_id, :weight => 81 rule %r{Duplicate ID .*}, %r{The first occurrence of ID .* was here}, :code => :dup_id, :weight => 82 rule %r{Browsing context name must be at least one character long}, :code => :empty_target, :weight => 83 rule %r{An ID must not contain whitespace}, :code => :id_whitespace, :weight => 83 rule %r{Attribute modid}, %r{Attribute data}, %r{Attribute image not allowed on element .*img<}, %r{Attribute defaulturl not allowed on element .*form<}, %r{Attribute url not allowed on element .*img<}, %r{Attribute pos not allowed on element .*a<}, %r{Attribute qlicon not allowed on element .*img<}, %r{Attribute thumb not allowed on element .*img<}, %r{Attribute ql not allowed on element .*button<}, %r{Attribute smartpid not allowed on element .*input<}, %r{Attribute articleid not allowed on element .*span<}, %r{Attribute overflowurl not allowed on element .*span<}, %r{Attribute src not allowed on element .*span<}, %r{Attribute alt not allowed on element .*span<}, %r{Bad character .* Probable cause: Unescaped}, :code => :private_extension, :weight => 84 rule %r{An object element must have a data attribute or a type attribute}, :code => :nonstd_object, :weight => 84 rule %r{Element .*div<.* not allowed as child of element <}, %r{Element .*n<.* not allowed as child of element <}, %r{Element .*p<.* not allowed as child of element <}, %r{Element .*form<.* not allowed as child of element <}, %r{Element .*style<.* not allowed as child of element <}, %r{Start tag p seen in table}, %r{Start tag div seen in table}, %r{ must not appear as a descendant of the }, %r{An a start tag seen with already an active a element}, %r{must have an ID value that matches that for attribute}, %r{The for attribute of the label element must refer to a form control}, %r{there is no map element with a name attribute with that value}, :code => :schema, :weight => 85 # rule %r{Required attributes missing on element}, rule %r{An img element must have a src attribute}, :code => :required_attrs, :weight => 86 rule %r{Attribute name not allowed on element .*}, :code => :name_attr, :weight => 87 rule %r{Attribute widh not allowed on element}, :code => :nonstd_attribute_mispelled, :weight => 88 rule %r{Attribute \w+:[^<]*;[^<]*}, :code => :style_as_attribute, :weight => 90 rule %r{Attribute ;}, :code => :stray_semicolon, :weight => 91 rule %r{Stray doctype}, :code => :stray_doctype, :weight => 91 rule %r{Bogus comment}, :code => :bogus_comment, :weight => 92 rule %r{Forbidden code point}, :code => :forbidden_code_point, :weight => 93 rule %r{src on element .*img<.*: CONTROL_CHARACTER in PATH}, :code => :space_in_uri_path, :weight => 94 rule %r{Attribute http:<}, %r{" in an unquoted attribute value}, %r{Attribute www\.}, %r{A slash was not immediate followed by ><}, :code => :syntax_attr, :weight => 95 rule %r{Internal encoding declaration .* disagrees with the actual encoding of the document}, :code => :encoding_error, :weight=>98 # main output $cgi.html do |x| x.header do x.title 'Categorizing Validator Messages' x.link :rel=>'stylesheet', :href=>'style.css' x.meta :charset => 'utf-8' end x.body do x.h1 "Categorizing Validator Messages - #{$param.site}" counters = Hash.new {|hash,key| hash[key]=[]} notes.each do |note| match = [100, 'unknown'] $rules.each do |pattern, code, weight| if note =~ pattern match = [weight, code.to_s] break end end counters[match] << note end htmlentities = HTMLEntities.new counters.keys.sort.reverse.each do |weight, code| x.h2 "#{code} (#{counters[[weight, code]].length})" x.ul do counters[[weight, code]].each do |note| x << htmlentities.decode(note.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;').gsub('"','&quot;').gsub(''','&apos')) + "\n" end end end end end