Difference between revisions of "Andlabs/Sega of Japan Dreamcast Master List"
From Sega Retro
(→1998) |
(→1998) |
||
(4 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
because I've had it now | because I've had it now | ||
<br> ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing [[HuneX]]'s website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct | <br> ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing [[HuneX]]'s website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct | ||
+ | |||
+ | Sometimes there will be a translated name on the left spine that differs from the Japanese name; that will be listed second, after a slash. I personally consider this to be a secondary name :/ | ||
{| class="prettytable" | {| class="prettytable" | ||
Line 26: | Line 28: | ||
==1998== | ==1998== | ||
{| class="prettytable" | {| class="prettytable" | ||
− | + | {{wipdclist_head}} | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
{{wipdclist | {{wipdclist | ||
| genrelist=ADV | | genrelist=ADV | ||
| genrebox=ADV | | genrebox=ADV | ||
| publisher=[[45XLV]] | | publisher=[[45XLV]] | ||
− | | title=[[July]] | + | | title=[[July]]/? |
| jptitle=July | | jptitle=July | ||
| overseas={{no}} | | overseas={{no}} | ||
Line 54: | Line 43: | ||
}} | }} | ||
{{wipdclist | {{wipdclist | ||
− | | verified=good | + | | verified=not yet good |
| genrelist=RCG | | genrelist=RCG | ||
| genrebox=RCG | | genrebox=RCG | ||
| publisher=[[GE]] | | publisher=[[GE]] | ||
− | | title=[[Pen Pen TriIcelon]] | + | | title=[[Pen Pen TriIcelon]]/(check) |
| jptitle=ペンペン トライアイスロン | | jptitle=ペンペン トライアイスロン | ||
| overseas={{yes|Same}} | | overseas={{yes|Same}} | ||
Line 158: | Line 147: | ||
}} | }} | ||
|} | |} | ||
+ | |||
+ | ==generator== | ||
+ | <pre><nowiki># 27-28 oct 2011 | ||
+ | # NOTE: PYTHON 3 | ||
+ | |||
+ | import sys | ||
+ | import urllib.request | ||
+ | import io | ||
+ | from lxml import etree | ||
+ | |||
+ | def pageURLGen(): | ||
+ | p_hw = 10 # Dreamcast | ||
+ | p_sr = 640 # start record according to clicking on the last page of results | ||
+ | p_rpp = 20 # decrement p_sr each time; sega seems to ignore this parameter so we have to deal with it ourselves | ||
+ | while p_sr >= 0: | ||
+ | yield "http://sega.jp/cgi-bin/csgame.cgi?rpp={}&sr={}&tt=&sy=&gr=&hw={}&fw=&sort=2".format(p_rpp, p_sr, p_hw) | ||
+ | p_sr -= p_rpp | ||
+ | |||
+ | # constants | ||
+ | genreIcons = { | ||
+ | "/shared/images/icons/genre_act.gif": "ACT", | ||
+ | "/shared/images/icons/genre_ftg.gif": "FTG", | ||
+ | "/shared/images/icons/genre_stg.gif": "STG", | ||
+ | "/shared/images/icons/genre_tbl.gif": "TBL", | ||
+ | "/shared/images/icons/genre_rcg.gif": "RCG", | ||
+ | "/shared/images/icons/genre_slg.gif": "SLG", | ||
+ | "/shared/images/icons/genre_spg.gif": "SPG", | ||
+ | "/shared/images/icons/genre_etc.gif": "ETC", | ||
+ | "/shared/images/icons/genre_rpg.gif": "RPG", | ||
+ | "/shared/images/icons/genre_adv.gif": "ADV", | ||
+ | "/shared/images/icons/genre_pzl.gif": "PZL", | ||
+ | "/shared/images/icons/genre_arpg.gif": "ARPG", | ||
+ | "/shared/images/icons/genre_srpg.gif": "SRPG", | ||
+ | "/shared/images/icons/genre_aadv.gif": "AADV" | ||
+ | } | ||
+ | ceroIcons = { | ||
+ | "/shared/images/icons/icon_cero-a_xsmall.gif": "a", | ||
+ | "/shared/images/icons/icon_cero-b_xsmall.gif": "b", | ||
+ | "/shared/images/icons/icon_cero-c_xsmall.gif": "c", | ||
+ | "/shared/images/icons/icon_cero-d_xsmall.gif": "d", | ||
+ | "/shared/images/icons/icon_cero-z_xsmall.gif": "z", | ||
+ | "?? free": "free", | ||
+ | "?? 12": "12", | ||
+ | "?? 15": "15", | ||
+ | "/shared/images/icons/icon_cero-h_xsmall.gif": "18" | ||
+ | } | ||
+ | noCEROIcon = "/shared/images/icons/icon_cero-_xsmall.gif" | ||
+ | partnerIcon = "/shared/images/icons/partners.gif" | ||
+ | dreamcastIcon = "/shared/images/icons/hard_DC.gif" | ||
+ | networkIcon = "/shared/images/icons/function_network.gif" | ||
+ | # TODO other icons that may or may not be skipped | ||
+ | homepageLinkIcon = "/shared/images/icons/btn_official.gif" | ||
+ | cataloguePageLinkIcon = "/shared/images/icons/btn_product.gif" | ||
+ | |||
+ | def isComment(e): | ||
+ | return e.tag == etree.Comment | ||
+ | |||
+ | def getImgTag(element): | ||
+ | for e in element: | ||
+ | if isComment(e): | ||
+ | continue | ||
+ | if e.tag.lower() == "img": | ||
+ | return e | ||
+ | raise Exception("expected img element; found none") | ||
+ | |||
+ | def handleDivClassIcon(element): | ||
+ | imgTag = getImgTag(element) | ||
+ | src = imgTag.attrib["src"] | ||
+ | if src == dreamcastIcon: # skip this icon | ||
+ | return "" | ||
+ | elif src == partnerIcon: # note that this is third party | ||
+ | return "\n| publisher=THIRD PARTY" | ||
+ | elif src == networkIcon: | ||
+ | return "\n| network={{yes}}" | ||
+ | elif src in genreIcons: | ||
+ | return "\n| genrelist={}".format(genreIcons[src]) + \ | ||
+ | "\n| genrebox=" | ||
+ | else: | ||
+ | raise Exception("unknown icon {}".format(src)) | ||
+ | |||
+ | def handleDivClassTitleText(element): | ||
+ | return "\n| title=" + \ | ||
+ | "\n| jptitle={}".format(element.text) + \ | ||
+ | "\n| overseas=" | ||
+ | |||
+ | def handleATag(element): | ||
+ | url = element.attrib["href"] | ||
+ | img = getImgTag(element) | ||
+ | imgsrc = img.attrib["src"] | ||
+ | if imgsrc == homepageLinkIcon: | ||
+ | return "\n| homepage={}".format(url) | ||
+ | elif imgsrc == cataloguePageLinkIcon: | ||
+ | return "\n| catalogue=http://sega.jp{}".format(url) | ||
+ | else: | ||
+ | raise Exception("unknown image link {} -> {}".format(src, url)) | ||
+ | |||
+ | def handleDivRightColumn(element): | ||
+ | s = "" | ||
+ | for e in element.getiterator(): | ||
+ | if isComment(e): | ||
+ | continue | ||
+ | if e.tag.lower() == "a": | ||
+ | s += handleATag(e) | ||
+ | elif e.tag.lower() == "div": | ||
+ | divclass = e.attrib.get("class") | ||
+ | if divclass == "icon": | ||
+ | s += handleDivClassIcon(e) | ||
+ | elif divclass == "titleText": | ||
+ | s += handleDivClassTitleText(e) | ||
+ | return s | ||
+ | |||
+ | def handleFirstColumn(element): | ||
+ | for e in element.getiterator(): | ||
+ | if isComment(e): | ||
+ | continue | ||
+ | if e.tag.lower() == "div" and e.attrib.get("class") == "rightColumn": | ||
+ | return handleDivRightColumn(e) | ||
+ | return "" | ||
+ | |||
+ | def handleCEROCell(element): | ||
+ | for e in element.getiterator(): | ||
+ | if isComment(e): | ||
+ | continue | ||
+ | if e.tag.lower() == "img": | ||
+ | if e.attrib.get("src") in ceroIcons: | ||
+ | return "\n| cero={}".format(ceroIcons[e.attrib["src"]]) | ||
+ | elif e.attrib.get("src") == noCEROIcon: | ||
+ | return "" | ||
+ | else: | ||
+ | raise Exception("unknown CERO icon {}".format(e.attrib["src"])) | ||
+ | return "" | ||
+ | |||
+ | def handleRow(element): | ||
+ | if element[0].attrib.get("class").startswith("th1"): # skip table header | ||
+ | return "" | ||
+ | s = "{{wipdclist" | ||
+ | s += handleFirstColumn(element[0]) | ||
+ | s += "\n| release={}".format(element[1].text) | ||
+ | s += "\n| price={}".format(element[2].text) | ||
+ | s += handleCEROCell(element[3]) | ||
+ | s += "\n| seriallist=" | ||
+ | s += "\n| serialbox=" | ||
+ | return s + "\n}}" | ||
+ | |||
+ | from lxml import html | ||
+ | |||
+ | #try: | ||
+ | for no in [1]: | ||
+ | for page in pageURLGen(): | ||
+ | tree = html.parse(page) | ||
+ | glist = [] | ||
+ | for e in tree.getiterator(): # search through all tags for the one we want | ||
+ | if isComment(e): | ||
+ | continue | ||
+ | if e.tag.lower() == "div" and e.attrib.get("id") == "resultTable": | ||
+ | for tr in e[0]: # we assume this is a <table> | ||
+ | game = handleRow(tr) | ||
+ | if game != "": | ||
+ | glist += [game] | ||
+ | break | ||
+ | glist.reverse() # pages store in reverse chronological order | ||
+ | for game in glist: | ||
+ | print(game) | ||
+ | # TODO figure out the cleanest way to get a traceback | ||
+ | </nowiki></pre> |
Latest revision as of 11:37, 20 February 2012
because I've had it now
ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing HuneX's website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct
Sometimes there will be a translated name on the left spine that differs from the Japanese name; that will be listed second, after a slash. I personally consider this to be a secondary name :/
Legend |
---|
Everything (all fields in table + page categories) verified |
Everything verified, but box/master list genre mismatch |
Problem with verification; cell(s) left white explains what (everything else is fine) |
Unverified (either I didn't get to it yet or the page doesn't exist) |
1998 1999 2000 2001 2002 2003 2004 2005-2007
1998
Romanized Name | Japanese Name | Publisher | Genre on Master List | Genre on Box | Release Date | Price | Catalogue Page | Homepage | Serial on Master List | Serial on Box | CERO | Overseas Names |
---|---|---|---|---|---|---|---|---|---|---|---|---|
July/? | July | 45XLV | ADV | ADV | 1998/11/27 | 5,800円 | [1] | T-35401M | No | |||
Pen Pen TriIcelon/(check) | ペンペン トライアイスロン | GE | RCG | RCG | 1998/11/27 | 5,800円 | [2] | T-17001M | T-17001M | Same | ||
バーチャファイター3tb リピート版 | Sega | ACT | 1998/11/27 | 5,800円 | [3] | |||||||
Virtua Fighter 3tb | バーチャファイター3tb | Sega | ACT | 1998/11/27 | 5,800円 | [4] | ||||||
Godzilla Generations | GODZILLA GENERATIONS | Sega | ACT | ACT | 1998/11/27 | 5,800円 | [5] | HDR-0004 | HDR-0004 | No | ||
?/Incoming Humanity Last Battle | インカミング 人類最終決戦 | Imagineer | STG | STG | 1998/12/17 | 5,800円 | [6] | T-15001M | T-15001M | Incoming | ||
Sonic Adventure | SONIC ADVENTURE | Sega | ACT | ACT | 1998/12/23 | 5,800円 | [7] | HDR-000 | HDR-0001 | Same | ||
Tetris 4D | TETRIS 4D(テトリス フォーディー) | BPS | PZL | PZL | 1998/12/23 | 4,800円 | [8] | T-20801M | T-20801M | No | ||
Seventh Cross | SEVENTH CROSS | NEC | SRPG | SRPG | 1998/12/23 | 5,800円 | [9] | T-38802M | US: Seventh Cross Evolution EU: none |
generator
# 27-28 oct 2011 # NOTE: PYTHON 3 import sys import urllib.request import io from lxml import etree def pageURLGen(): p_hw = 10 # Dreamcast p_sr = 640 # start record according to clicking on the last page of results p_rpp = 20 # decrement p_sr each time; sega seems to ignore this parameter so we have to deal with it ourselves while p_sr >= 0: yield "http://sega.jp/cgi-bin/csgame.cgi?rpp={}&sr={}&tt=&sy=&gr=&hw={}&fw=&sort=2".format(p_rpp, p_sr, p_hw) p_sr -= p_rpp # constants genreIcons = { "/shared/images/icons/genre_act.gif": "ACT", "/shared/images/icons/genre_ftg.gif": "FTG", "/shared/images/icons/genre_stg.gif": "STG", "/shared/images/icons/genre_tbl.gif": "TBL", "/shared/images/icons/genre_rcg.gif": "RCG", "/shared/images/icons/genre_slg.gif": "SLG", "/shared/images/icons/genre_spg.gif": "SPG", "/shared/images/icons/genre_etc.gif": "ETC", "/shared/images/icons/genre_rpg.gif": "RPG", "/shared/images/icons/genre_adv.gif": "ADV", "/shared/images/icons/genre_pzl.gif": "PZL", "/shared/images/icons/genre_arpg.gif": "ARPG", "/shared/images/icons/genre_srpg.gif": "SRPG", "/shared/images/icons/genre_aadv.gif": "AADV" } ceroIcons = { "/shared/images/icons/icon_cero-a_xsmall.gif": "a", "/shared/images/icons/icon_cero-b_xsmall.gif": "b", "/shared/images/icons/icon_cero-c_xsmall.gif": "c", "/shared/images/icons/icon_cero-d_xsmall.gif": "d", "/shared/images/icons/icon_cero-z_xsmall.gif": "z", "?? free": "free", "?? 12": "12", "?? 15": "15", "/shared/images/icons/icon_cero-h_xsmall.gif": "18" } noCEROIcon = "/shared/images/icons/icon_cero-_xsmall.gif" partnerIcon = "/shared/images/icons/partners.gif" dreamcastIcon = "/shared/images/icons/hard_DC.gif" networkIcon = "/shared/images/icons/function_network.gif" # TODO other icons that may or may not be skipped homepageLinkIcon = "/shared/images/icons/btn_official.gif" cataloguePageLinkIcon = "/shared/images/icons/btn_product.gif" def isComment(e): return e.tag == etree.Comment def getImgTag(element): for e in element: if isComment(e): continue if e.tag.lower() == "img": return e raise Exception("expected img element; found none") def handleDivClassIcon(element): imgTag = getImgTag(element) src = imgTag.attrib["src"] if src == dreamcastIcon: # skip this icon return "" elif src == partnerIcon: # note that this is third party return "\n| publisher=THIRD PARTY" elif src == networkIcon: return "\n| network={{yes}}" elif src in genreIcons: return "\n| genrelist={}".format(genreIcons[src]) + \ "\n| genrebox=" else: raise Exception("unknown icon {}".format(src)) def handleDivClassTitleText(element): return "\n| title=" + \ "\n| jptitle={}".format(element.text) + \ "\n| overseas=" def handleATag(element): url = element.attrib["href"] img = getImgTag(element) imgsrc = img.attrib["src"] if imgsrc == homepageLinkIcon: return "\n| homepage={}".format(url) elif imgsrc == cataloguePageLinkIcon: return "\n| catalogue=http://sega.jp{}".format(url) else: raise Exception("unknown image link {} -> {}".format(src, url)) def handleDivRightColumn(element): s = "" for e in element.getiterator(): if isComment(e): continue if e.tag.lower() == "a": s += handleATag(e) elif e.tag.lower() == "div": divclass = e.attrib.get("class") if divclass == "icon": s += handleDivClassIcon(e) elif divclass == "titleText": s += handleDivClassTitleText(e) return s def handleFirstColumn(element): for e in element.getiterator(): if isComment(e): continue if e.tag.lower() == "div" and e.attrib.get("class") == "rightColumn": return handleDivRightColumn(e) return "" def handleCEROCell(element): for e in element.getiterator(): if isComment(e): continue if e.tag.lower() == "img": if e.attrib.get("src") in ceroIcons: return "\n| cero={}".format(ceroIcons[e.attrib["src"]]) elif e.attrib.get("src") == noCEROIcon: return "" else: raise Exception("unknown CERO icon {}".format(e.attrib["src"])) return "" def handleRow(element): if element[0].attrib.get("class").startswith("th1"): # skip table header return "" s = "{{wipdclist" s += handleFirstColumn(element[0]) s += "\n| release={}".format(element[1].text) s += "\n| price={}".format(element[2].text) s += handleCEROCell(element[3]) s += "\n| seriallist=" s += "\n| serialbox=" return s + "\n}}" from lxml import html #try: for no in [1]: for page in pageURLGen(): tree = html.parse(page) glist = [] for e in tree.getiterator(): # search through all tags for the one we want if isComment(e): continue if e.tag.lower() == "div" and e.attrib.get("id") == "resultTable": for tr in e[0]: # we assume this is a <table> game = handleRow(tr) if game != "": glist += [game] break glist.reverse() # pages store in reverse chronological order for game in glist: print(game) # TODO figure out the cleanest way to get a traceback