001 /*
002 * GeoAPI - Java interfaces for OGC/ISO standards
003 * http://www.geoapi.org
004 *
005 * Copyright (C) 2007-2012 Open Geospatial Consortium, Inc.
006 * All Rights Reserved. http://www.opengeospatial.org/ogc/legal
007 *
008 * Permission to use, copy, and modify this software and its documentation, with
009 * or without modification, for any purpose and without fee or royalty is hereby
010 * granted, provided that you include the following on ALL copies of the software
011 * and documentation or portions thereof, including modifications, that you make:
012 *
013 * 1. The full text of this NOTICE in a location viewable to users of the
014 * redistributed or derivative work.
015 * 2. Notice of any changes or modifications to the OGC files, including the
016 * date changes were made.
017 *
018 * THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE
019 * NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
020 * TO, WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT
021 * THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY
022 * PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
023 *
024 * COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR
025 * CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR DOCUMENTATION.
026 *
027 * The name and trademarks of copyright holders may NOT be used in advertising or
028 * publicity pertaining to the software without specific, written prior permission.
029 * Title to copyright in this software and any associated documentation will at all
030 * times remain with copyright holders.
031 */
032 package org.opengis.metadata.identification;
033
034 import java.nio.charset.Charset;
035 import java.nio.charset.UnsupportedCharsetException;
036 import java.util.List;
037 import java.util.ArrayList;
038
039 import org.opengis.util.CodeList;
040 import org.opengis.annotation.UML;
041
042 import static org.opengis.annotation.Specification.ISO_19115;
043 import static org.opengis.annotation.Obligation.CONDITIONAL;
044
045
046 /**
047 * Name of the character coding standard used for the resource.
048 *
049 * @author Ely Conn (Leica Geosystems Geospatial Imaging, LLC)
050 * @version 3.0
051 * @since 2.1
052 */
053 @UML(identifier="MD_CharacterSetCode", specification=ISO_19115)
054 public final class CharacterSet extends CodeList<CharacterSet> {
055 /**
056 * Serial number for compatibility with different versions.
057 */
058 private static final long serialVersionUID = -4726629268456735927L;
059
060 /**
061 * List of all enumerations of this type.
062 * Must be declared before any enum declaration.
063 */
064 private static final List<CharacterSet> VALUES = new ArrayList<CharacterSet>(29);
065
066 /**
067 * 16-bit fixed size Universal Character Set, based on ISO/IEC 10646.
068 */
069 @UML(identifier="ucs2", obligation=CONDITIONAL, specification=ISO_19115)
070 public static final CharacterSet UCS_2 = new CharacterSet("UCS_2", "UCS-2");
071
072 /**
073 * 32-bit fixed size Universal Character Set, based on ISO/IEC 10646.
074 */
075 @UML(identifier="ucs4", obligation=CONDITIONAL, specification=ISO_19115)
076 public static final CharacterSet UCS_4 = new CharacterSet("UCS_4", "UCS-4");
077
078 /**
079 * 7-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
080 */
081 @UML(identifier="utf7", obligation=CONDITIONAL, specification=ISO_19115)
082 public static final CharacterSet UTF_7 = new CharacterSet("UTF_7", "UTF-7");
083
084 /**
085 * 8-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
086 */
087 @UML(identifier="utf8", obligation=CONDITIONAL, specification=ISO_19115)
088 public static final CharacterSet UTF_8 = new CharacterSet("UTF_8", "UTF-8");
089
090 /**
091 * 16-bit variable size UCS Transfer Format, based on ISO/IEC 10646.
092 */
093 @UML(identifier="utf16", obligation=CONDITIONAL, specification=ISO_19115)
094 public static final CharacterSet UTF_16 = new CharacterSet("UTF_16", "UTF-16");
095
096 /**
097 * ISO/IEC 8859-1, Information technology.
098 * 8-bit single-byte coded graphic character sets - Part 1: Latin alphabet No. 1.
099 */
100 @UML(identifier="8859part1", obligation=CONDITIONAL, specification=ISO_19115)
101 public static final CharacterSet ISO_8859_1 = new CharacterSet("ISO_8859_1", "ISO-8859-1");
102
103 /**
104 * ISO/IEC 8859-2, Information technology.
105 * 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2.
106 */
107 @UML(identifier="8859part2", obligation=CONDITIONAL, specification=ISO_19115)
108 public static final CharacterSet ISO_8859_2 = new CharacterSet("ISO_8859_2", "ISO-8859-2");
109
110 /**
111 * ISO/IEC 8859-3, Information technology.
112 * 8-bit single-byte coded graphic character sets - Part 3: Latin alphabet No. 3.
113 */
114 @UML(identifier="8859part3", obligation=CONDITIONAL, specification=ISO_19115)
115 public static final CharacterSet ISO_8859_3 = new CharacterSet("ISO_8859_3", "ISO-8859-3");
116
117 /**
118 * ISO/IEC 8859-4, Information technology.
119 * 8-bit single-byte coded graphic character sets - Part 4: Latin alphabet No. 4.
120 */
121 @UML(identifier="8859part4", obligation=CONDITIONAL, specification=ISO_19115)
122 public static final CharacterSet ISO_8859_4 = new CharacterSet("ISO_8859_4", "ISO-8859-4");
123
124 /**
125 * ISO/IEC 8859-5, Information technology.
126 * 8-bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet.
127 */
128 @UML(identifier="8859part5", obligation=CONDITIONAL, specification=ISO_19115)
129 public static final CharacterSet ISO_8859_5 = new CharacterSet("ISO_8859_5", "ISO-8859-5");
130
131 /**
132 * ISO/IEC 8859-6, Information technology.
133 * 8-bit single-byte coded graphic character sets - Part 6: Latin/Arabic alphabet.
134 */
135 @UML(identifier="8859part6", obligation=CONDITIONAL, specification=ISO_19115)
136 public static final CharacterSet ISO_8859_6 = new CharacterSet("ISO_8859_6", "ISO-8859-6");
137
138 /**
139 * ISO/IEC 8859-7, Information technology.
140 * 8-bit single-byte coded graphic character sets - Part 7: Latin/Greek alphabet.
141 */
142 @UML(identifier="8859part7", obligation=CONDITIONAL, specification=ISO_19115)
143 public static final CharacterSet ISO_8859_7 = new CharacterSet("ISO_8859_7", "ISO-8859-7");
144
145 /**
146 * ISO/IEC 8859-8, Information technology.
147 * 8-bit single-byte coded graphic character sets - Part 8: Latin/Hebrew alphabet.
148 */
149 @UML(identifier="8859part8", obligation=CONDITIONAL, specification=ISO_19115)
150 public static final CharacterSet ISO_8859_8 = new CharacterSet("ISO_8859_8", "ISO-8859-8");
151
152 /**
153 * ISO/IEC 8859-9, Information technology.
154 * 8-bit single-byte coded graphic character sets - Part 9: Latin alphabet No. 5.
155 */
156 @UML(identifier="8859part9", obligation=CONDITIONAL, specification=ISO_19115)
157 public static final CharacterSet ISO_8859_9 = new CharacterSet("ISO_8859_9", "ISO-8859-9");
158
159 /**
160 * ISO/IEC 8859-10, Information technology.
161 * 8-bit single-byte coded graphic character sets - Part 10: Latin alphabet No. 6.
162 */
163 @UML(identifier="8859part10", obligation=CONDITIONAL, specification=ISO_19115)
164 public static final CharacterSet ISO_8859_10 = new CharacterSet("ISO_8859_10", "ISO-8859-10");
165
166 /**
167 * ISO/IEC 8859-11, Information technology.
168 * 8-bit single-byte coded graphic character sets - Part 11: Latin/Thai alphabet.
169 */
170 @UML(identifier="8859part11", obligation=CONDITIONAL, specification=ISO_19115)
171 public static final CharacterSet ISO_8859_11 = new CharacterSet("ISO_8859_11", "ISO-8859-11");
172
173 /**
174 * A future ISO/IEC 8-bit single-byte coded graphic character set.
175 */
176 @UML(identifier="8859part12", obligation=CONDITIONAL, specification=ISO_19115)
177 public static final CharacterSet ISO_8859_12 = new CharacterSet("ISO_8859_12", "ISO-8859-12");
178
179 /**
180 * ISO/IEC 8859-13, Information technology.
181 * 8-bit single-byte coded graphic character sets - Part 13: Latin alphabet No. 7.
182 */
183 @UML(identifier="8859part13", obligation=CONDITIONAL, specification=ISO_19115)
184 public static final CharacterSet ISO_8859_13 = new CharacterSet("ISO_8859_13", "ISO-8859-13");
185
186 /**
187 * ISO/IEC 8859-14, Information technology.
188 * 8-bit single-byte coded graphic character sets - Part 14: Latin alphabet No. 8 (Celtic).
189 */
190 @UML(identifier="8859part14", obligation=CONDITIONAL, specification=ISO_19115)
191 public static final CharacterSet ISO_8859_14 = new CharacterSet("ISO_8859_14", "ISO-8859-14");
192
193 /**
194 * ISO/IEC 8859-15, Information technology.
195 * 8-bit single-byte coded graphic character sets - Part 15: Latin alphabet No. 9.
196 */
197 @UML(identifier="8859part15", obligation=CONDITIONAL, specification=ISO_19115)
198 public static final CharacterSet ISO_8859_15 = new CharacterSet("ISO_8859_15", "ISO-8859-15");
199
200 /**
201 * ISO/IEC 8859-16, Information technology.
202 * 8-bit single-byte coded graphic character sets - Part 16: Latin alphabet No. 10.
203 */
204 @UML(identifier="8859part16", obligation=CONDITIONAL, specification=ISO_19115)
205 public static final CharacterSet ISO_8859_16 = new CharacterSet("ISO_8859_16", "ISO-8859-16");
206
207 /**
208 * Japanese code set used for electronic transmission.
209 */
210 @UML(identifier="jis", obligation=CONDITIONAL, specification=ISO_19115)
211 public static final CharacterSet JIS = new CharacterSet("JIS", "JIS_X0201");
212
213 /**
214 * Japanese code set used on MS-DOS based machines.
215 */
216 @UML(identifier="shiftJIS", obligation=CONDITIONAL, specification=ISO_19115)
217 public static final CharacterSet SHIFT_JIS = new CharacterSet("SHIFT_JIS", "Shift_JIS");
218
219 /**
220 * Japanese code set used on UNIX based machines.
221 */
222 @UML(identifier="eucJP", obligation=CONDITIONAL, specification=ISO_19115)
223 public static final CharacterSet EUC_JP = new CharacterSet("EUC_JP", "EUC-JP");
224
225 /**
226 * United States ASCII code set (ISO 646 US).
227 */
228 @UML(identifier="usAscii", obligation=CONDITIONAL, specification=ISO_19115)
229 public static final CharacterSet US_ASCII = new CharacterSet("US_ASCII", "US-ASCII");
230
231 /**
232 * IBM mainframe code set.
233 */
234 @UML(identifier="ebcdic", obligation=CONDITIONAL, specification=ISO_19115)
235 public static final CharacterSet EBCDIC = new CharacterSet("EBCDIC", null);
236
237 /**
238 * Korean code set.
239 */
240 @UML(identifier="eucKR", obligation=CONDITIONAL, specification=ISO_19115)
241 public static final CharacterSet EUC_KR = new CharacterSet("EUC_KR", "EUC-KR");
242
243 /**
244 * Traditional Chinese code set used in Taiwan, Hong Kong, and other areas.
245 */
246 @UML(identifier="big5", obligation=CONDITIONAL, specification=ISO_19115)
247 public static final CharacterSet BIG_5 = new CharacterSet("BIG_5", "Big5");
248
249 /**
250 * Simplified Chinese code set.
251 */
252 @UML(identifier="GB2312", obligation=CONDITIONAL, specification=ISO_19115)
253 public static final CharacterSet GB2312 = new CharacterSet("GB2312", "GB2312");
254
255 /**
256 * The Java {@link Charset} name (never {@code null}).
257 */
258 private final String charset;
259
260 /**
261 * Constructs an enum with the given name. The new enum is
262 * automatically added to the list returned by {@link #values}.
263 *
264 * @param name The enum name. This name must not be in use by an other enum of this type.
265 * @param charset The Java {@link Charset} name, or {@code null} if none.
266 */
267 private CharacterSet(final String name, final String charset) {
268 super(name, VALUES);
269 this.charset = (charset != null) ? charset : name;
270 }
271
272 /**
273 * Constructs an enum with identical name and charset.
274 * This is needed for {@link CodeList#valueOf(Class, String)} reflection.
275 */
276 private CharacterSet(final String name) {
277 this(name, name);
278 }
279
280 /**
281 * Converts the Character Set to a java Charset, if it can.
282 *
283 * @return The Java Charset.
284 * @throws UnsupportedCharsetException If no support for the charset is available.
285 *
286 * @see <A HREF="http://download.oracle.com/javase/6/docs/technotes/guides/intl/encoding.doc.html">Supported encodings</A>
287 */
288 public Charset toCharset() throws UnsupportedCharsetException {
289 return Charset.forName(charset);
290 }
291
292 /**
293 * Returns all the names of this code. The returned array contains the
294 * following elements, with duplicated values and null values removed:
295 * <p>
296 * <ul>
297 * <li>The programmatic {@linkplain #name() name}</li>
298 * <li>The UML {@linkplain #identifier() identifier}</li>
299 * <li>The {@linkplain #toCharset() charset} name</li>
300 * </ul>
301 * <p>
302 * Those names are typically equal except for the case (programmatic names are upper case
303 * while UML names are lower case) and special characters like {@code '-'}.
304 *
305 * @return All names of this code constant. This array is never null and never empty.
306 *
307 * @since 2.3
308 */
309 @Override
310 public String[] names() {
311 final String name = name();
312 final String charset = this.charset;
313 if (charset.equals(name)) {
314 return super.names();
315 }
316 final String identifier = identifier();
317 if (identifier != null && !identifier.equals(name)) {
318 return new String[] {name, identifier, charset};
319 } else {
320 return new String[] {name, charset};
321 }
322 }
323
324 /**
325 * Returns the list of {@code CharacterSet}s.
326 *
327 * @return The list of codes declared in the current JVM.
328 */
329 public static CharacterSet[] values() {
330 synchronized (VALUES) {
331 return VALUES.toArray(new CharacterSet[VALUES.size()]);
332 }
333 }
334
335 /**
336 * Returns the list of enumerations of the same kind than this enum.
337 */
338 @Override
339 public CharacterSet[] family() {
340 return values();
341 }
342
343 /**
344 * Returns the character set that matches the given string, or returns a
345 * new one if none match it. More specifically, this methods returns the first instance for
346 * which <code>{@linkplain #name() name()}.{@linkplain String#equals equals}(code)</code>
347 * returns {@code true}. If no existing instance is found, then a new one is created for
348 * the given name.
349 *
350 * @param code The name of the code to fetch or to create.
351 * @return A code matching the given name.
352 */
353 public static CharacterSet valueOf(String code) {
354 return valueOf(CharacterSet.class, code);
355 }
356 }