Difference between revisions of "Andlabs/Sega of Japan Dreamcast Master List"

From Sega Retro

 
(43 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
because I've had it now
 
because I've had it now
 +
<br> ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing [[HuneX]]'s website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct
  
TOOD actually make this page
+
Sometimes there will be a translated name on the left spine that differs from the Japanese name; that will be listed second, after a slash. I personally consider this to be a secondary name :/
<pre>
 
notes for writing data extraction script
 
  
results are in a table in a div id="resultTable"
+
{| class="prettytable"
table has four columns: title date price CERO
+
|-
 +
! Legend
 +
|- bgcolor="#44FF00"
 +
| Everything (all fields in table + page categories) verified
 +
|- bgcolor="#CCCC00"
 +
| Everything verified, but box/master list genre mismatch
 +
|- bgcolor="#FF4400"
 +
| Problem with verification; cell(s) left white explains what (everything else is fine)
 +
|-
 +
| Unverified (either I didn't get to it yet or the page doesn't exist)
 +
|}
 +
 
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List|1998]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/1999|1999]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2000|2000]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2001|2001]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2002|2002]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2003|2003]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2004|2004]]
 +
[[User:Andlabs/Sega of Japan Dreamcast Master List/2005-2007|2005-2007]]
 +
 
 +
==1998==
 +
{| class="prettytable"
 +
{{wipdclist_head}}
 +
{{wipdclist
 +
| genrelist=ADV
 +
| genrebox=ADV
 +
| publisher=[[45XLV]]
 +
| title=[[July]]/?
 +
| jptitle=July
 +
| overseas={{no}}
 +
| catalogue=http://sega.jp/dc/981002/
 +
| release=1998/11/27
 +
| price=5,800円
 +
| seriallist=T-35401M
 +
| serialbox=
 +
}}
 +
{{wipdclist
 +
| verified=not yet good
 +
| genrelist=RCG
 +
| genrebox=RCG
 +
| publisher=[[GE]]
 +
| title=[[Pen Pen TriIcelon]]/(check)
 +
| jptitle=ペンペン トライアイスロン
 +
| overseas={{yes|Same}}
 +
| catalogue=http://sega.jp/dc/981005/
 +
| release=1998/11/27
 +
| price=5,800円
 +
| seriallist=T-17001M
 +
| serialbox=T-17001M
 +
}}
 +
{{wipdclist
 +
| genrelist=ACT
 +
| genrebox=
 +
| title=
 +
| jptitle=バーチャファイター3tb リピート版
 +
| overseas=
 +
| catalogue=http://sega.jp/dc/981004/
 +
| release=1998/11/27
 +
| price=5,800円
 +
| seriallist=
 +
| serialbox=
 +
}}
 +
{{wipdclist
 +
| genrelist=ACT
 +
| genrebox=
 +
| title=[[Virtua Fighter 3tb]]
 +
| jptitle=バーチャファイター3tb
 +
| overseas=
 +
| catalogue=http://sega.jp/dc/981004/
 +
| release=1998/11/27
 +
| price=5,800円
 +
| seriallist=
 +
| serialbox=
 +
}}
 +
{{wipdclist
 +
| verified=good
 +
| genrelist=ACT
 +
| genrebox=ACT
 +
| title=[[Godzilla Generations]]
 +
| jptitle=GODZILLA GENERATIONS
 +
| overseas={{no}}
 +
| catalogue=http://sega.jp/dc/981001/
 +
| release=1998/11/27
 +
| price=5,800円
 +
| seriallist=HDR-0004
 +
| serialbox=HDR-0004
 +
}}
 +
{{wipdclist
 +
| genrelist=STG
 +
| genrebox=STG
 +
| publisher=[[Imagineer]]
 +
| title=?/[[Incoming Humanity Last Battle]]
 +
| jptitle=インカミング 人類最終決戦
 +
| overseas={{yes|[[Incoming]]}}
 +
| catalogue=http://sega.jp/dc/981009/
 +
| release=1998/12/17
 +
| price=5,800円
 +
| seriallist=T-15001M
 +
| serialbox=T-15001M
 +
}}
 +
{{wipdclist
 +
| verified=almost
 +
| genrelist=ACT
 +
| genrebox=ACT
 +
| title=[[Sonic Adventure]]
 +
| jptitle=SONIC ADVENTURE
 +
| overseas={{yes|Same}}
 +
| catalogue=http://sega.jp/dc/981008/
 +
| release=1998/12/23
 +
| price=5,800円
 +
| seriallist=HDR-000
 +
| serialbox=HDR-0001
 +
}}
 +
{{wipdclist
 +
| verified=good
 +
| genrelist=PZL
 +
| genrebox=PZL
 +
| publisher=[[BPS]]
 +
| title=[[Tetris 4D]]
 +
| jptitle=TETRIS 4D(テトリス フォーディー)
 +
| overseas={{no}}
 +
| catalogue=http://sega.jp/dc/981101/
 +
| release=1998/12/23
 +
| price=4,800円
 +
| seriallist=T-20801M
 +
| serialbox=T-20801M
 +
}}
 +
{{wipdclist
 +
| genrelist=SRPG
 +
| genrebox=SRPG
 +
| publisher=[[NEC]]
 +
| title=[[Seventh Cross]]
 +
| jptitle=SEVENTH CROSS
 +
| overseas={{yes|US: [[Seventh Cross Evolution]]<br>EU: none}}
 +
| catalogue=http://sega.jp/dc/981012/
 +
| release=1998/12/23
 +
| price=5,800円
 +
| seriallist=T-38802M
 +
| serialbox=
 +
}}
 +
|}
 +
 
 +
==generator==
 +
<pre><nowiki># 27-28 oct 2011
 +
# NOTE: PYTHON 3
 +
 
 +
import sys
 +
import urllib.request
 +
import io
 +
from lxml import etree
  
each successive tr has
+
def pageURLGen():
- td with div class="twoColumnsSoftTitle"
+
p_hw  = 10  # Dreamcast
  - div class="leftColumn"
+
p_sr  = 640 # start record according to clicking on the last page of results
    - div class="imageLeft" with icon
+
p_rpp = 20  # decrement p_sr each time; sega seems to ignore this parameter so we have to deal with it ourselves
  - div class="rightColumn"
+
while p_sr >= 0:
    - div class="softTitle" with 2 or 3 div class="icon" containing the DC icon, genre icon, and optional 3rd-party flag icon in that order, a div class="titleText" with the game's Japanese name, and up to two image links: /shared/images/icons/btn_official.gif for homepage, /shared/images/icons/btn_product.gif for catalogue page
+
yield "http://sega.jp/cgi-bin/csgame.cgi?rpp={}&sr={}&tt=&sy=&gr=&hw={}&fw=&sort=2".format(p_rpp, p_sr, p_hw)
- three tds, with the date, price, and optional CERO icon (image)
+
p_sr -= p_rpp
  
there's no guarantee that URLs will be identical or that this is the same for all entries
+
# constants
 +
genreIcons = {
 +
"/shared/images/icons/genre_act.gif": "ACT",
 +
"/shared/images/icons/genre_ftg.gif": "FTG",
 +
"/shared/images/icons/genre_stg.gif": "STG",
 +
"/shared/images/icons/genre_tbl.gif": "TBL",
 +
"/shared/images/icons/genre_rcg.gif": "RCG",
 +
"/shared/images/icons/genre_slg.gif": "SLG",
 +
"/shared/images/icons/genre_spg.gif": "SPG",
 +
"/shared/images/icons/genre_etc.gif": "ETC",
 +
"/shared/images/icons/genre_rpg.gif": "RPG",
 +
"/shared/images/icons/genre_adv.gif": "ADV",
 +
"/shared/images/icons/genre_pzl.gif": "PZL",
 +
"/shared/images/icons/genre_arpg.gif": "ARPG",
 +
"/shared/images/icons/genre_srpg.gif": "SRPG",
 +
"/shared/images/icons/genre_aadv.gif": "AADV"
 +
}
 +
ceroIcons = {
 +
"/shared/images/icons/icon_cero-a_xsmall.gif": "a",
 +
"/shared/images/icons/icon_cero-b_xsmall.gif": "b",
 +
"/shared/images/icons/icon_cero-c_xsmall.gif": "c",
 +
"/shared/images/icons/icon_cero-d_xsmall.gif": "d",
 +
"/shared/images/icons/icon_cero-z_xsmall.gif": "z",
 +
"?? free": "free",
 +
"?? 12": "12",
 +
"?? 15": "15",
 +
"/shared/images/icons/icon_cero-h_xsmall.gif": "18"
 +
}
 +
noCEROIcon = "/shared/images/icons/icon_cero-_xsmall.gif"
 +
partnerIcon = "/shared/images/icons/partners.gif"
 +
dreamcastIcon = "/shared/images/icons/hard_DC.gif"
 +
networkIcon = "/shared/images/icons/function_network.gif"
 +
# TODO other icons that may or may not be skipped
 +
homepageLinkIcon = "/shared/images/icons/btn_official.gif"
 +
cataloguePageLinkIcon = "/shared/images/icons/btn_product.gif"
  
TODO figure out how to get all 652 entries to show up on one page
+
def isComment(e):
</pre>
+
return e.tag == etree.Comment
  
{| class="prettytable"
+
def getImgTag(element):
|-
+
for e in element:
!Romanized Name
+
if isComment(e):
!Japanese Name
+
continue
!Publisher
+
if e.tag.lower() == "img":
!Genre on Master List
+
return e
!Genre on Box
+
raise Exception("expected img element; found none")
!Release Date
+
 
!Price
+
def handleDivClassIcon(element):
!Serial on Master List
+
imgTag = getImgTag(element)
!Serial on Box
+
src = imgTag.attrib["src"]
!Catalogue Page
+
if src == dreamcastIcon: # skip this icon
!Homepage
+
return ""
!CERO
+
elif src == partnerIcon: # note that this is third party
!Overseas Names
+
return "\n| publisher=THIRD PARTY"
|-
+
elif src == networkIcon:
|}
+
return "\n| network={{yes}}"
 +
elif src in genreIcons:
 +
return "\n| genrelist={}".format(genreIcons[src]) + \
 +
"\n| genrebox="
 +
else:
 +
raise Exception("unknown icon {}".format(src))
 +
 
 +
def handleDivClassTitleText(element):
 +
return "\n| title=" + \
 +
"\n| jptitle={}".format(element.text) + \
 +
"\n| overseas="
 +
 
 +
def handleATag(element):
 +
url = element.attrib["href"]
 +
img = getImgTag(element)
 +
imgsrc = img.attrib["src"]
 +
if imgsrc == homepageLinkIcon:
 +
return "\n| homepage={}".format(url)
 +
elif imgsrc == cataloguePageLinkIcon:
 +
return "\n| catalogue=http://sega.jp{}".format(url)
 +
else:
 +
raise Exception("unknown image link {} -> {}".format(src, url))
 +
 
 +
def handleDivRightColumn(element):
 +
s = ""
 +
for e in element.getiterator():
 +
if isComment(e):
 +
continue
 +
if e.tag.lower() == "a":
 +
s += handleATag(e)
 +
elif e.tag.lower() == "div":
 +
divclass = e.attrib.get("class")
 +
if divclass == "icon":
 +
s += handleDivClassIcon(e)
 +
elif divclass == "titleText":
 +
s += handleDivClassTitleText(e)
 +
return s
 +
 
 +
def handleFirstColumn(element):
 +
for e in element.getiterator():
 +
if isComment(e):
 +
continue
 +
if e.tag.lower() == "div" and e.attrib.get("class") == "rightColumn":
 +
return handleDivRightColumn(e)
 +
return ""
 +
 
 +
def handleCEROCell(element):
 +
for e in element.getiterator():
 +
if isComment(e):
 +
continue
 +
if e.tag.lower() == "img":
 +
if e.attrib.get("src") in ceroIcons:
 +
return "\n| cero={}".format(ceroIcons[e.attrib["src"]])
 +
elif e.attrib.get("src") == noCEROIcon:
 +
return ""
 +
else:
 +
raise Exception("unknown CERO icon {}".format(e.attrib["src"]))
 +
return ""
 +
 
 +
def handleRow(element):
 +
if element[0].attrib.get("class").startswith("th1"): # skip table header
 +
return ""
 +
s = "{{wipdclist"
 +
s += handleFirstColumn(element[0])
 +
s += "\n| release={}".format(element[1].text)
 +
s += "\n| price={}".format(element[2].text)
 +
s += handleCEROCell(element[3])
 +
s += "\n| seriallist="
 +
s += "\n| serialbox="
 +
return s + "\n}}"
 +
 
 +
from lxml import html
 +
 
 +
#try:
 +
for no in [1]:
 +
for page in pageURLGen():
 +
tree = html.parse(page)
 +
glist = []
 +
for e in tree.getiterator(): # search through all tags for the one we want
 +
if isComment(e):
 +
continue
 +
if e.tag.lower() == "div" and e.attrib.get("id") == "resultTable":
 +
for tr in e[0]: # we assume this is a <table>
 +
game = handleRow(tr)
 +
if game != "":
 +
glist += [game]
 +
break
 +
glist.reverse() # pages store in reverse chronological order
 +
for game in glist:
 +
print(game)
 +
# TODO figure out the cleanest way to get a traceback
 +
</nowiki></pre>

Latest revision as of 11:37, 20 February 2012

because I've had it now
ok more explanation: Guardiana (where our DC master list appears to comes from) prioritizes JP name over US name so I kept getting caught off-guard when making lists (what tipped me over the edge was seeing HuneX's website list the JP and US versions of games separately) or just has wrong info (wonders never cease); sometimes the Sega pages have wrong serial numbers (happens more often with Saturn) or randomly combine versions or something, and again I want to make sure everything here is correct

Sometimes there will be a translated name on the left spine that differs from the Japanese name; that will be listed second, after a slash. I personally consider this to be a secondary name :/

Legend
Everything (all fields in table + page categories) verified
Everything verified, but box/master list genre mismatch
Problem with verification; cell(s) left white explains what (everything else is fine)
Unverified (either I didn't get to it yet or the page doesn't exist)

1998 1999 2000 2001 2002 2003 2004 2005-2007

1998

Romanized Name Japanese Name Publisher Genre on Master List Genre on Box Release Date Price Catalogue Page Homepage Serial on Master List Serial on Box CERO Overseas Names
July/? July 45XLV ADV ADV 1998/11/27 5,800円 [1] T-35401M No
Pen Pen TriIcelon/(check) ペンペン トライアイスロン GE RCG RCG 1998/11/27 5,800円 [2] T-17001M T-17001M Same
バーチャファイター3tb リピート版 Sega ACT 1998/11/27 5,800円 [3]
Virtua Fighter 3tb バーチャファイター3tb Sega ACT 1998/11/27 5,800円 [4]
Godzilla Generations GODZILLA GENERATIONS Sega ACT ACT 1998/11/27 5,800円 [5] HDR-0004 HDR-0004 No
?/Incoming Humanity Last Battle インカミング 人類最終決戦 Imagineer STG STG 1998/12/17 5,800円 [6] T-15001M T-15001M Incoming
Sonic Adventure SONIC ADVENTURE Sega ACT ACT 1998/12/23 5,800円 [7] HDR-000 HDR-0001 Same
Tetris 4D TETRIS 4D(テトリス フォーディー) BPS PZL PZL 1998/12/23 4,800円 [8] T-20801M T-20801M No
Seventh Cross SEVENTH CROSS NEC SRPG SRPG 1998/12/23 5,800円 [9] T-38802M US: Seventh Cross Evolution
EU: none

generator

# 27-28 oct 2011
# NOTE: PYTHON 3

import sys
import urllib.request
import io
from lxml import etree

def pageURLGen():
	p_hw  = 10  # Dreamcast
	p_sr  = 640 # start record according to clicking on the last page of results
	p_rpp = 20  # decrement p_sr each time; sega seems to ignore this parameter so we have to deal with it ourselves
	while p_sr >= 0:
		yield "http://sega.jp/cgi-bin/csgame.cgi?rpp={}&sr={}&tt=&sy=&gr=&hw={}&fw=&sort=2".format(p_rpp, p_sr, p_hw)
		p_sr -= p_rpp

# constants
genreIcons = {
	"/shared/images/icons/genre_act.gif": "ACT",
	"/shared/images/icons/genre_ftg.gif": "FTG",
	"/shared/images/icons/genre_stg.gif": "STG",
	"/shared/images/icons/genre_tbl.gif": "TBL",
	"/shared/images/icons/genre_rcg.gif": "RCG",
	"/shared/images/icons/genre_slg.gif": "SLG",
	"/shared/images/icons/genre_spg.gif": "SPG",
	"/shared/images/icons/genre_etc.gif": "ETC",
	"/shared/images/icons/genre_rpg.gif": "RPG",
	"/shared/images/icons/genre_adv.gif": "ADV",
	"/shared/images/icons/genre_pzl.gif": "PZL",
	"/shared/images/icons/genre_arpg.gif": "ARPG",
	"/shared/images/icons/genre_srpg.gif": "SRPG",
	"/shared/images/icons/genre_aadv.gif": "AADV"
}
ceroIcons = {
	"/shared/images/icons/icon_cero-a_xsmall.gif": "a",
	"/shared/images/icons/icon_cero-b_xsmall.gif": "b",
	"/shared/images/icons/icon_cero-c_xsmall.gif": "c",
	"/shared/images/icons/icon_cero-d_xsmall.gif": "d",
	"/shared/images/icons/icon_cero-z_xsmall.gif": "z",
	"?? free": "free",
	"?? 12": "12",
	"?? 15": "15",
	"/shared/images/icons/icon_cero-h_xsmall.gif": "18"
}
noCEROIcon = "/shared/images/icons/icon_cero-_xsmall.gif"
partnerIcon = "/shared/images/icons/partners.gif"
dreamcastIcon = "/shared/images/icons/hard_DC.gif"
networkIcon = "/shared/images/icons/function_network.gif"
# TODO other icons that may or may not be skipped
homepageLinkIcon = "/shared/images/icons/btn_official.gif"
cataloguePageLinkIcon = "/shared/images/icons/btn_product.gif"

def isComment(e):
	return e.tag == etree.Comment

def getImgTag(element):
	for e in element:
		if isComment(e):
			continue
		if e.tag.lower() == "img":
			return e
	raise Exception("expected img element; found none")

def handleDivClassIcon(element):
	imgTag = getImgTag(element)
	src = imgTag.attrib["src"]
	if src == dreamcastIcon: # skip this icon
		return ""
	elif src == partnerIcon: # note that this is third party
		return "\n| publisher=THIRD PARTY"
	elif src == networkIcon:
		return "\n| network={{yes}}"
	elif src in genreIcons:
		return "\n| genrelist={}".format(genreIcons[src]) + \
			"\n| genrebox="
	else:
		raise Exception("unknown icon {}".format(src))

def handleDivClassTitleText(element):
	return "\n| title=" + \
		"\n| jptitle={}".format(element.text) + \
		"\n| overseas="

def handleATag(element):
	url = element.attrib["href"]
	img = getImgTag(element)
	imgsrc = img.attrib["src"]
	if imgsrc == homepageLinkIcon:
		return "\n| homepage={}".format(url)
	elif imgsrc == cataloguePageLinkIcon:
		return "\n| catalogue=http://sega.jp{}".format(url)
	else:
		raise Exception("unknown image link {} -> {}".format(src, url))

def handleDivRightColumn(element):
	s = ""
	for e in element.getiterator():
		if isComment(e):
			continue
		if e.tag.lower() == "a":
			s += handleATag(e)
		elif e.tag.lower() == "div":
			divclass = e.attrib.get("class")
			if divclass == "icon":
				s += handleDivClassIcon(e)
			elif divclass == "titleText":
				s += handleDivClassTitleText(e)
	return s

def handleFirstColumn(element):
	for e in element.getiterator():
		if isComment(e):
			continue
		if e.tag.lower() == "div" and e.attrib.get("class") == "rightColumn":
			return handleDivRightColumn(e)
	return ""

def handleCEROCell(element):
	for e in element.getiterator():
		if isComment(e):
			continue
		if e.tag.lower() == "img":
			if e.attrib.get("src") in ceroIcons:
				return "\n| cero={}".format(ceroIcons[e.attrib["src"]])
			elif e.attrib.get("src") == noCEROIcon:
				return ""
			else:
				raise Exception("unknown CERO icon {}".format(e.attrib["src"]))
	return ""

def handleRow(element):
	if element[0].attrib.get("class").startswith("th1"): # skip table header
		return ""
	s = "{{wipdclist"
	s += handleFirstColumn(element[0])
	s += "\n| release={}".format(element[1].text)
	s += "\n| price={}".format(element[2].text)
	s += handleCEROCell(element[3])
	s += "\n| seriallist="
	s += "\n| serialbox="
	return s + "\n}}"

from lxml import html

#try:
for no in [1]:
	for page in pageURLGen():
		tree = html.parse(page)
		glist = []
		for e in tree.getiterator(): # search through all tags for the one we want
			if isComment(e):
				continue
			if e.tag.lower() == "div" and e.attrib.get("id") == "resultTable":
				for tr in e[0]: # we assume this is a <table>
					game = handleRow(tr)
					if game != "":
						glist += [game]
				break
		glist.reverse() # pages store in reverse chronological order
		for game in glist:
			print(game)
# TODO figure out the cleanest way to get a traceback