JSPの文字コード関連のメモ

Webブラウザが文字コードを判定する基準は?:Javaの文字化け対策FAQ(1) - @IT
pageEncoding属性を見て無ければcharset属性を見る。

確かにTomcatでもそうなっているみたい。ただ4系と6系では結構違うみたい。

  • Tomcat6系
    /*
     * Determines page source encoding for page or tag file in JSP syntax,
     * by reading (in this order) the value of the 'pageEncoding' page
     * directive attribute, or the charset value of the 'contentType' page
     * directive attribute.
     *
     * @return The page encoding, or null if not found
     */
    private String getPageEncodingForJspSyntax(JspReader jspReader,
					       Mark startMark)
	        throws JasperException {

	String encoding = null;
        String saveEncoding = null;

        jspReader.reset(startMark);

	/*
	 * Determine page encoding from directive of the form <%@ page %>,
	 * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
	 */
        while (true) {
            if (jspReader.skipUntil("<") == null) {
                break;
            }
            // If this is a comment, skip until its end
            if (jspReader.matches("%--")) {
                if (jspReader.skipUntil("--%>") == null) {
                    // error will be caught in Parser
                    break;
                }
                continue;
            }
            boolean isDirective = jspReader.matches("%@");
            if (isDirective) {
	        jspReader.skipSpaces();
            }
            else {
                isDirective = jspReader.matches("jsp:directive.");
            }
            if (!isDirective) {
                continue;
            }

	    // compare for "tag ", so we don't match "taglib"
	    if (jspReader.matches("tag ") || jspReader.matches("page")) {

		jspReader.skipSpaces();
                Attributes attrs = Parser.parseAttributes(this, jspReader);
		encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
                if (encoding != null) {
                    break;
                }
		encoding = getPageEncodingFromDirective(attrs, "contentType");
                if (encoding != null) {
                    saveEncoding = encoding;
                }
	    }
	}

        if (encoding == null) {
            encoding = saveEncoding;
        }

	return encoding;
    }

http://svn.apache.org/repos/asf/tomcat/jasper//tc6.0.x/src/share/org/apache/jasper/compiler/ParserController.java

  • Tomcat4系
    //*********************************************************************
    // Figure out input Document

    private void figureOutJspDocument(String file, 
                                      String encoding,
                                      InputStreamReader reader)
         throws JasperException
    {
        JspReader jspReader;
        try {
            jspReader = new JspReader(ctxt, file, encoding, reader,
                                      compiler.getErrorDispatcher());
        } catch (FileNotFoundException ex) {
            throw new JasperException(ex);
        }
        jspReader.setSingleFile(true);
        Mark startMark = jspReader.mark();

        // Check for the jsp:root tag
        // No check for xml prolog, since nothing prevents a page
        // to output XML and still use JSP syntax.
        jspReader.reset(startMark);
        Mark mark = jspReader.skipUntil(JSP_ROOT_TAG);
        if (mark != null) {
            isXml = true;
        } else {
            isXml = false;
        }

        newEncoding = null;
        String pageEncoding = null;
        
        // Figure out the encoding of the page
        // xml parser will take care of encoding for
        // page in XML syntax since we pass it a stream
        if (!isXml) {
            jspReader.reset(startMark);
            while (jspReader.skipUntil("<%@") != null) {
                jspReader.skipSpaces();
                if (jspReader.matches("page")) {
                    jspReader.skipSpaces();
                    Attributes attrs = Parser.parseAttributes(this, jspReader);
                    pageEncoding = attrs.getValue("pageEncoding");
                    if (pageEncoding == null) {
                        String contentType = attrs.getValue("contentType");
                        if (contentType != null) {
                            int loc = contentType.indexOf("charset=");
                            if (loc != -1) {
                                newEncoding = contentType.substring(loc+8);
                            }
                        }
                    } else {
                        newEncoding = pageEncoding;
                        return;
                    }
                }
            }
        }
    }

http://svn.apache.org/repos/asf/tomcat/jasper/branches/tc4.1.x/src/share/org/apache/jasper/compiler/ParserController.java