|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | +package org.apache.tomcat.util.buf; |
| 18 | + |
| 19 | +import java.nio.charset.Charset; |
| 20 | +import java.nio.charset.CharsetDecoder; |
| 21 | +import java.nio.charset.CharsetEncoder; |
| 22 | +import java.util.Locale; |
| 23 | +import java.util.concurrent.ConcurrentHashMap; |
| 24 | +import java.util.concurrent.ConcurrentMap; |
| 25 | + |
| 26 | +public class CharsetCache { |
| 27 | + |
| 28 | + private static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" }; |
| 29 | + |
| 30 | + /* |
| 31 | + * Tested with: |
| 32 | + * - Oracle JDK 8 u192 |
| 33 | + * - OpenJDK 13 EA 4 |
| 34 | + */ |
| 35 | + private static final String[] LAZY_CHARSETS = new String[] { |
| 36 | + "big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", "gb2312", "gbk", "ibm-thai", "ibm00858", |
| 37 | + "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145", "ibm01146", "ibm01147", "ibm01148", |
| 38 | + "ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", "ibm278", "ibm280", "ibm284", "ibm285", |
| 39 | + "ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", "ibm775", "ibm850", "ibm852", "ibm855", |
| 40 | + "ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", "ibm865", "ibm866", "ibm868", "ibm869", |
| 41 | + "ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", "iso-2022-jp-2", "iso-2022-kr", "iso-8859-13", |
| 42 | + "iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", |
| 43 | + "iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", "jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis", |
| 44 | + "tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", "utf-32be", "utf-32le", "x-utf-32be-bom", |
| 45 | + "x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", |
| 46 | + "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "x-big5-hkscs-2001", |
| 47 | + "x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097", |
| 48 | + "x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", "x-ibm1129", "x-ibm1166", "x-ibm1364", |
| 49 | + "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856", |
| 50 | + "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", "x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939", |
| 51 | + "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950", |
| 52 | + "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", "x-iso-2022-cn-gb", "x-jis0208", |
| 53 | + "x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-macarabic", |
| 54 | + "x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew", |
| 55 | + "x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine", |
| 56 | + "x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", "x-eucjp-open", "x-iso-8859-11", "x-mswin-936", |
| 57 | + "x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950", |
| 58 | + "x-windows-iso2022jp" |
| 59 | + }; |
| 60 | + |
| 61 | + private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null); |
| 62 | + |
| 63 | + private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>(); |
| 64 | + |
| 65 | + public CharsetCache() { |
| 66 | + // Pre-populate the cache |
| 67 | + for (String charsetName : INITIAL_CHARSETS) { |
| 68 | + Charset charset = Charset.forName(charsetName); |
| 69 | + addToCache(charsetName, charset); |
| 70 | + } |
| 71 | + |
| 72 | + for (String charsetName : LAZY_CHARSETS) { |
| 73 | + addToCache(charsetName, DUMMY_CHARSET); |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + |
| 78 | + private void addToCache(String name, Charset charset) { |
| 79 | + cache.put(name, charset); |
| 80 | + for (String alias : charset.aliases()) { |
| 81 | + cache.put(alias.toLowerCase(Locale.ENGLISH), charset); |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + |
| 86 | + public Charset getCharset(String charsetName) { |
| 87 | + String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH); |
| 88 | + |
| 89 | + Charset result = cache.get(lcCharsetName); |
| 90 | + |
| 91 | + if (result == DUMMY_CHARSET) { |
| 92 | + // Name is known but the Charset is not in the cache |
| 93 | + Charset charset = Charset.forName(lcCharsetName); |
| 94 | + if (charset == null) { |
| 95 | + // Charset not available in this JVM - remove cache entry |
| 96 | + cache.remove(lcCharsetName); |
| 97 | + result = null; |
| 98 | + } else { |
| 99 | + // Charset is available - populate cache entry |
| 100 | + addToCache(lcCharsetName, charset); |
| 101 | + result = charset; |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + return result; |
| 106 | + } |
| 107 | + |
| 108 | + |
| 109 | + /* |
| 110 | + * Placeholder Charset implementation for entries that will be loaded lazily |
| 111 | + * into the cache. |
| 112 | + */ |
| 113 | + private static class DummyCharset extends Charset { |
| 114 | + |
| 115 | + protected DummyCharset(String canonicalName, String[] aliases) { |
| 116 | + super(canonicalName, aliases); |
| 117 | + } |
| 118 | + |
| 119 | + @Override |
| 120 | + public boolean contains(Charset cs) { |
| 121 | + return false; |
| 122 | + } |
| 123 | + |
| 124 | + @Override |
| 125 | + public CharsetDecoder newDecoder() { |
| 126 | + return null; |
| 127 | + } |
| 128 | + |
| 129 | + @Override |
| 130 | + public CharsetEncoder newEncoder() { |
| 131 | + return null; |
| 132 | + } |
| 133 | + } |
| 134 | +} |
0 commit comments