success:
true
identity:
@context:
alsoKnownAs:
verificationMethod:
type:
"Multikey"
controller:
"did:plc:23e6dsev72e5hvxzi2c7tvyz"
publicKeyMultibase:
"zQ3shuqNcT5ACMfpbEktQunzuYgTuxgARQ5DToKP2wjH1XTNY"
service:
id:
"#atproto_pds"
type:
"AtprotoPersonalDataServer"
serviceEndpoint:
"https://morel.us-east.host.bsky.network"
cid:
"bafyreifc5h5ry5k7oa35b632msfnehzhkr3ocqsq2mgqana7wk6yqfk4gm"
value:
text:
"We find that LMs struggle to tag and verify standards linked to problems. They can also generate problems that do not fully align with standards described in prompts, and checking for generation issues w/ GPT-4 can lead to more optimistic predictions than the ones teachers make."
$type:
"app.bsky.feed.post"
embed:
$type:
"app.bsky.embed.images"
images:
alt:
"A table of common reasons for why generated problems have no or partial alignment, obtained via open coding of teachers’ explanations. Provided examples in each row are cases where GPT-4 judges a problem to be fully aligned, but teachers do not."
image:
$type:
"blob"
ref:
$link:
"bafkreigb7orukapjbreb3ndlsmrnbrw7okxi72hpqhbtuyolq4qpetyhmm"
mimeType:
"image/jpeg"
size:
772039
aspectRatio:
width:
2000
height:
965
langs:
"en"
reply:
root:
cid:
"bafyreihtmvms3x2n7rxuumbnn6duo4sz56ss6qbefjr3vfufdystr7nlua"
parent:
cid:
"bafyreibcw2f3rm7chjyp7tay65topclhammenyacnori2s5jtpgbq3hi7i"
createdAt:
"2024-08-14T03:18:38.173Z"