src/lang-css.js

/**
 * @license
 * Copyright (C) 2009 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * @fileoverview
 * Registers a language handler for CSS.
 *
 *
 * To use, include prettify.js and this file in your HTML page.
 * Then put your code in an HTML tag like
 *      <pre class="prettyprint lang-css"></pre>
 *
 *
 * http://www.w3.org/TR/CSS21/grammar.html Section G2 defines the lexical
 * grammar.  This scheme does not recognize keywords containing escapes.
 *
 * @author mikesamuel@gmail.com
 */

// This file is a call to a function defined in prettify.js which defines a
// lexical scanner for CSS and maps tokens to styles.

// The call to PR['registerLangHandler'] is quoted so that Closure Compiler
// will not rename the call so that this language extensions can be
// compiled/minified separately from one another.  Other symbols defined in
// prettify.js are similarly quoted.

// The call is structured thus:
// PR['registerLangHandler'](
//    PR['createSimpleLexer'](
//        shortcutPatterns,
//        fallThroughPatterns),
//    [languageId0, ..., languageIdN])

// Langugage IDs
// =============
// The language IDs are typically the file extensions of source files for
// that language so that users can syntax highlight arbitrary files based
// on just the extension.  This is heuristic, but works pretty well in
// practice.

// Patterns
// ========
// Lexers are typically implemented as a set of regular expressions.
// The SimpleLexer function takes regular expressions, styles, and some
// pragma-info and produces a lexer.  A token description looks like
//   [STYLE_NAME, /regular-expression/, pragmas]

// Initially, simple lexer's inner loop looked like:

//    while sourceCode is not empty:
//      try each regular expression in order until one matches
//      remove the matched portion from sourceCode

// This was really slow for large files because some JS interpreters
// do a buffer copy on the matched portion which is O(n*n)

// The current loop now looks like

//    1. use js-modules/combinePrefixPatterns.js to 
//       combine all regular expressions into one 
//    2. use a single global regular expresion match to extract all tokens
//    3. for each token try regular expressions in order until one matches it
//       and classify it using the associated style

// This is a lot more efficient but it does mean that lookahead and lookbehind
// can't be used across boundaries to classify tokens.

// Sometimes we need lookahead and lookbehind and sometimes we want to handle
// embedded language -- JavaScript or CSS embedded in HTML, or inline assembly
// in C.

// If a particular pattern has a numbered group, and its style pattern starts
// with "lang-" as in
//    ['lang-js', /<script>(.*?)<\/script>/]
// then the token classification step breaks the token into pieces.
// Group 1 is re-parsed using the language handler for "lang-js", and the
// surrounding portions are reclassified using the current language handler.
// This mechanism gives us both lookahead, lookbehind, and language embedding.

// Shortcut Patterns
// =================
// A shortcut pattern is one that is tried before other patterns if the first
// character in the token is in the string of characters.
// This very effectively lets us make quick correct decisions for common token
// types.

// All other patterns are fall-through patterns.


// The comments inline below refer to productions in the CSS specification's
// lexical grammar.  See link above.
PR['registerLangHandler'](
    PR['createSimpleLexer'](
        // Shortcut patterns.
        [
         // The space production <s>
         [PR['PR_PLAIN'],       /^[ \t\r\n\f]+/, null, ' \t\r\n\f']
        ],
        // Fall-through patterns.
        [
         // Quoted strings.  <string1> and <string2>
         [PR['PR_STRING'],
          /^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/, null],
         [PR['PR_STRING'],
          /^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/, null],
         ['lang-css-str', /^url\(([^\)\"\']+)\)/i],
         [PR['PR_KEYWORD'],
          /^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-\w]|$)/i,
          null],
         // A property name -- an identifier followed by a colon.
         ['lang-css-kw', /^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\[0-9a-f]+ ?))*)\s*:/i],
         // A C style block comment.  The <comment> production.
         [PR['PR_COMMENT'], /^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//],
         // Escaping text spans
         [PR['PR_COMMENT'], /^(?:<!--|-->)/],
         // A number possibly containing a suffix.
         [PR['PR_LITERAL'], /^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i],
         // A hex color
         [PR['PR_LITERAL'], /^#(?:[0-9a-f]{3}){1,2}\b/i],
         // An identifier
         [PR['PR_PLAIN'],
          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i],
         // A run of punctuation
         [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/]
        ]),
    ['css']);
// Above we use embedded languages to highlight property names (identifiers
// followed by a colon) differently from identifiers in values.
PR['registerLangHandler'](
    PR['createSimpleLexer']([],
        [
         [PR['PR_KEYWORD'],
          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i]
        ]),
    ['css-kw']);
// The content of an unquoted URL literal like url(http://foo/img.png) should
// be colored as string content.  This language handler is used above in the
// URL production to do so.
PR['registerLangHandler'](
    PR['createSimpleLexer']([],
        [
         [PR['PR_STRING'], /^[^\)\"\']+/]
        ]),
    ['css-str']);