Libav 0.7.1
|
00001 /* 00002 * ID3v2 header parser 00003 * Copyright (c) 2003 Fabrice Bellard 00004 * 00005 * This file is part of Libav. 00006 * 00007 * Libav is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * Libav is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with Libav; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "id3v2.h" 00023 #include "id3v1.h" 00024 #include "libavutil/avstring.h" 00025 #include "libavutil/intreadwrite.h" 00026 #include "libavutil/dict.h" 00027 #include "avio_internal.h" 00028 00029 int ff_id3v2_match(const uint8_t *buf, const char * magic) 00030 { 00031 return buf[0] == magic[0] && 00032 buf[1] == magic[1] && 00033 buf[2] == magic[2] && 00034 buf[3] != 0xff && 00035 buf[4] != 0xff && 00036 (buf[6] & 0x80) == 0 && 00037 (buf[7] & 0x80) == 0 && 00038 (buf[8] & 0x80) == 0 && 00039 (buf[9] & 0x80) == 0; 00040 } 00041 00042 int ff_id3v2_tag_len(const uint8_t * buf) 00043 { 00044 int len = ((buf[6] & 0x7f) << 21) + 00045 ((buf[7] & 0x7f) << 14) + 00046 ((buf[8] & 0x7f) << 7) + 00047 (buf[9] & 0x7f) + 00048 ID3v2_HEADER_SIZE; 00049 if (buf[5] & 0x10) 00050 len += ID3v2_HEADER_SIZE; 00051 return len; 00052 } 00053 00054 static unsigned int get_size(AVIOContext *s, int len) 00055 { 00056 int v = 0; 00057 while (len--) 00058 v = (v << 7) + (avio_r8(s) & 0x7F); 00059 return v; 00060 } 00061 00062 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) 00063 { 00064 char *q, dst[512]; 00065 const char *val = NULL; 00066 int len, dstlen = sizeof(dst) - 1; 00067 unsigned genre; 00068 unsigned int (*get)(AVIOContext*) = avio_rb16; 00069 00070 dst[0] = 0; 00071 if (taglen < 1) 00072 return; 00073 00074 taglen--; /* account for encoding type byte */ 00075 00076 switch (avio_r8(pb)) { /* encoding type */ 00077 00078 case ID3v2_ENCODING_ISO8859: 00079 q = dst; 00080 while (taglen-- && q - dst < dstlen - 7) { 00081 uint8_t tmp; 00082 PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;) 00083 } 00084 *q = 0; 00085 break; 00086 00087 case ID3v2_ENCODING_UTF16BOM: 00088 taglen -= 2; 00089 switch (avio_rb16(pb)) { 00090 case 0xfffe: 00091 get = avio_rl16; 00092 case 0xfeff: 00093 break; 00094 default: 00095 av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key); 00096 return; 00097 } 00098 // fall-through 00099 00100 case ID3v2_ENCODING_UTF16BE: 00101 q = dst; 00102 while (taglen > 1 && q - dst < dstlen - 7) { 00103 uint32_t ch; 00104 uint8_t tmp; 00105 00106 GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;) 00107 PUT_UTF8(ch, tmp, *q++ = tmp;) 00108 } 00109 *q = 0; 00110 break; 00111 00112 case ID3v2_ENCODING_UTF8: 00113 len = FFMIN(taglen, dstlen); 00114 avio_read(pb, dst, len); 00115 dst[len] = 0; 00116 break; 00117 default: 00118 av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key); 00119 } 00120 00121 if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) 00122 && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) 00123 && genre <= ID3v1_GENRE_MAX) 00124 val = ff_id3v1_genre_str[genre]; 00125 else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) { 00126 /* dst now contains two 0-terminated strings */ 00127 dst[dstlen] = 0; 00128 len = strlen(dst); 00129 key = dst; 00130 val = dst + FFMIN(len + 1, dstlen); 00131 } 00132 else if (*dst) 00133 val = dst; 00134 00135 if (val) 00136 av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE); 00137 } 00138 00139 static int is_number(const char *str) 00140 { 00141 while (*str >= '0' && *str <= '9') str++; 00142 return !*str; 00143 } 00144 00145 static AVDictionaryEntry* get_date_tag(AVDictionary *m, const char *tag) 00146 { 00147 AVDictionaryEntry *t; 00148 if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) && 00149 strlen(t->value) == 4 && is_number(t->value)) 00150 return t; 00151 return NULL; 00152 } 00153 00154 static void merge_date(AVDictionary **m) 00155 { 00156 AVDictionaryEntry *t; 00157 char date[17] = {0}; // YYYY-MM-DD hh:mm 00158 00159 if (!(t = get_date_tag(*m, "TYER")) && 00160 !(t = get_date_tag(*m, "TYE"))) 00161 return; 00162 av_strlcpy(date, t->value, 5); 00163 av_dict_set(m, "TYER", NULL, 0); 00164 av_dict_set(m, "TYE", NULL, 0); 00165 00166 if (!(t = get_date_tag(*m, "TDAT")) && 00167 !(t = get_date_tag(*m, "TDA"))) 00168 goto finish; 00169 snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value); 00170 av_dict_set(m, "TDAT", NULL, 0); 00171 av_dict_set(m, "TDA", NULL, 0); 00172 00173 if (!(t = get_date_tag(*m, "TIME")) && 00174 !(t = get_date_tag(*m, "TIM"))) 00175 goto finish; 00176 snprintf(date + 10, sizeof(date) - 10, " %.2s:%.2s", t->value, t->value + 2); 00177 av_dict_set(m, "TIME", NULL, 0); 00178 av_dict_set(m, "TIM", NULL, 0); 00179 00180 finish: 00181 if (date[0]) 00182 av_dict_set(m, "date", date, 0); 00183 } 00184 00185 static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags) 00186 { 00187 int isv34, tlen, unsync; 00188 char tag[5]; 00189 int64_t next, end = avio_tell(s->pb) + len; 00190 int taghdrlen; 00191 const char *reason = NULL; 00192 AVIOContext pb; 00193 unsigned char *buffer = NULL; 00194 int buffer_size = 0; 00195 00196 switch (version) { 00197 case 2: 00198 if (flags & 0x40) { 00199 reason = "compression"; 00200 goto error; 00201 } 00202 isv34 = 0; 00203 taghdrlen = 6; 00204 break; 00205 00206 case 3: 00207 case 4: 00208 isv34 = 1; 00209 taghdrlen = 10; 00210 break; 00211 00212 default: 00213 reason = "version"; 00214 goto error; 00215 } 00216 00217 unsync = flags & 0x80; 00218 00219 if (isv34 && flags & 0x40) /* Extended header present, just skip over it */ 00220 avio_skip(s->pb, get_size(s->pb, 4)); 00221 00222 while (len >= taghdrlen) { 00223 unsigned int tflags = 0; 00224 int tunsync = 0; 00225 00226 if (isv34) { 00227 avio_read(s->pb, tag, 4); 00228 tag[4] = 0; 00229 if(version==3){ 00230 tlen = avio_rb32(s->pb); 00231 }else 00232 tlen = get_size(s->pb, 4); 00233 tflags = avio_rb16(s->pb); 00234 tunsync = tflags & ID3v2_FLAG_UNSYNCH; 00235 } else { 00236 avio_read(s->pb, tag, 3); 00237 tag[3] = 0; 00238 tlen = avio_rb24(s->pb); 00239 } 00240 if (tlen <= 0 || tlen > len - taghdrlen) { 00241 av_log(s, AV_LOG_WARNING, "Invalid size in frame %s, skipping the rest of tag.\n", tag); 00242 break; 00243 } 00244 len -= taghdrlen + tlen; 00245 next = avio_tell(s->pb) + tlen; 00246 00247 if (tflags & ID3v2_FLAG_DATALEN) { 00248 avio_rb32(s->pb); 00249 tlen -= 4; 00250 } 00251 00252 if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) { 00253 av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); 00254 avio_skip(s->pb, tlen); 00255 } else if (tag[0] == 'T') { 00256 if (unsync || tunsync) { 00257 int i, j; 00258 av_fast_malloc(&buffer, &buffer_size, tlen); 00259 if (!buffer) { 00260 av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen); 00261 goto seek; 00262 } 00263 for (i = 0, j = 0; i < tlen; i++, j++) { 00264 buffer[j] = avio_r8(s->pb); 00265 if (j > 0 && !buffer[j] && buffer[j - 1] == 0xff) { 00266 /* Unsynchronised byte, skip it */ 00267 j--; 00268 } 00269 } 00270 ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL); 00271 read_ttag(s, &pb, j, tag); 00272 } else { 00273 read_ttag(s, s->pb, tlen, tag); 00274 } 00275 } 00276 else if (!tag[0]) { 00277 if (tag[1]) 00278 av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding"); 00279 avio_skip(s->pb, tlen); 00280 break; 00281 } 00282 /* Skip to end of tag */ 00283 seek: 00284 avio_seek(s->pb, next, SEEK_SET); 00285 } 00286 00287 if (version == 4 && flags & 0x10) /* Footer preset, always 10 bytes, skip over it */ 00288 end += 10; 00289 00290 error: 00291 if (reason) 00292 av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n", version, reason); 00293 avio_seek(s->pb, end, SEEK_SET); 00294 av_free(buffer); 00295 return; 00296 } 00297 00298 void ff_id3v2_read(AVFormatContext *s, const char *magic) 00299 { 00300 int len, ret; 00301 uint8_t buf[ID3v2_HEADER_SIZE]; 00302 int found_header; 00303 int64_t off; 00304 00305 do { 00306 /* save the current offset in case there's nothing to read/skip */ 00307 off = avio_tell(s->pb); 00308 ret = avio_read(s->pb, buf, ID3v2_HEADER_SIZE); 00309 if (ret != ID3v2_HEADER_SIZE) 00310 break; 00311 found_header = ff_id3v2_match(buf, magic); 00312 if (found_header) { 00313 /* parse ID3v2 header */ 00314 len = ((buf[6] & 0x7f) << 21) | 00315 ((buf[7] & 0x7f) << 14) | 00316 ((buf[8] & 0x7f) << 7) | 00317 (buf[9] & 0x7f); 00318 ff_id3v2_parse(s, len, buf[3], buf[5]); 00319 } else { 00320 avio_seek(s->pb, off, SEEK_SET); 00321 } 00322 } while (found_header); 00323 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_34_metadata_conv); 00324 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_2_metadata_conv); 00325 ff_metadata_conv(&s->metadata, NULL, ff_id3v2_4_metadata_conv); 00326 merge_date(&s->metadata); 00327 } 00328 00329 const AVMetadataConv ff_id3v2_34_metadata_conv[] = { 00330 { "TALB", "album"}, 00331 { "TCOM", "composer"}, 00332 { "TCON", "genre"}, 00333 { "TCOP", "copyright"}, 00334 { "TENC", "encoded_by"}, 00335 { "TIT2", "title"}, 00336 { "TLAN", "language"}, 00337 { "TPE1", "artist"}, 00338 { "TPE2", "album_artist"}, 00339 { "TPE3", "performer"}, 00340 { "TPOS", "disc"}, 00341 { "TPUB", "publisher"}, 00342 { "TRCK", "track"}, 00343 { "TSSE", "encoder"}, 00344 { 0 } 00345 }; 00346 00347 const AVMetadataConv ff_id3v2_4_metadata_conv[] = { 00348 { "TDRL", "date"}, 00349 { "TDRC", "date"}, 00350 { "TDEN", "creation_time"}, 00351 { "TSOA", "album-sort"}, 00352 { "TSOP", "artist-sort"}, 00353 { "TSOT", "title-sort"}, 00354 { 0 } 00355 }; 00356 00357 const AVMetadataConv ff_id3v2_2_metadata_conv[] = { 00358 { "TAL", "album"}, 00359 { "TCO", "genre"}, 00360 { "TT2", "title"}, 00361 { "TEN", "encoded_by"}, 00362 { "TP1", "artist"}, 00363 { "TP2", "album_artist"}, 00364 { "TP3", "performer"}, 00365 { "TRK", "track"}, 00366 { 0 } 00367 }; 00368 00369 00370 const char ff_id3v2_tags[][4] = { 00371 "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT", 00372 "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED", 00373 "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3", 00374 "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE", 00375 { 0 }, 00376 }; 00377 00378 const char ff_id3v2_4_tags[][4] = { 00379 "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO", 00380 "TPRO", "TSOA", "TSOP", "TSOT", "TSST", 00381 { 0 }, 00382 }; 00383 00384 const char ff_id3v2_3_tags[][4] = { 00385 "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER", 00386 { 0 }, 00387 };