From 35ccc847b7b2acadc4ee96c2fd16d7fb3d9005f4 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Mon, 25 Sep 2023 12:11:10 -0700 Subject: [PATCH] Refactor inscription parsing (#2461) --- src/envelope.rs | 692 +++++++++++++++++++++++ src/index.rs | 69 ++- src/index/updater.rs | 4 +- src/index/updater/inscription_updater.rs | 24 +- src/inscription.rs | 624 +------------------- src/lib.rs | 2 + src/subcommand/decode.rs | 4 +- src/test.rs | 14 - 8 files changed, 797 insertions(+), 636 deletions(-) create mode 100644 src/envelope.rs diff --git a/src/envelope.rs b/src/envelope.rs new file mode 100644 index 0000000000..b93d615228 --- /dev/null +++ b/src/envelope.rs @@ -0,0 +1,692 @@ +use { + super::*, + bitcoin::blockdata::{ + opcodes, + script::{self, Instruction, Instructions}, + }, +}; + +pub(crate) const PROTOCOL_ID: [u8; 3] = *b"ord"; + +pub(crate) const BODY_TAG: [u8; 0] = []; +pub(crate) const CONTENT_TYPE_TAG: [u8; 1] = [1]; +pub(crate) const PARENT_TAG: [u8; 1] = [3]; +pub(crate) const METAPROTOCOL_TAG: [u8; 1] = [7]; + +type Result = std::result::Result; +type RawEnvelope = Envelope>>; +pub(crate) type ParsedEnvelope = Envelope; + +#[derive(Debug, PartialEq, Clone)] +pub(crate) struct Envelope { + pub(crate) payload: T, + pub(crate) input: u32, + pub(crate) offset: u32, +} + +fn remove_field(fields: &mut BTreeMap<&[u8], Vec<&[u8]>>, field: &[u8]) -> Option> { + let value = fields.get_mut(field)?; + + if value.is_empty() { + None + } else { + Some(value.remove(0).to_vec()) + } +} + +impl From for ParsedEnvelope { + fn from(envelope: RawEnvelope) -> Self { + let body = envelope + .payload + .iter() + .enumerate() + .position(|(i, push)| i % 2 == 0 && push.is_empty()); + + let mut fields: BTreeMap<&[u8], Vec<&[u8]>> = BTreeMap::new(); + + let mut incomplete_field = false; + + for item in envelope.payload[..body.unwrap_or(envelope.payload.len())].chunks(2) { + match item { + [key, value] => fields.entry(key).or_default().push(value), + _ => incomplete_field = true, + } + } + + let duplicate_field = fields.iter().any(|(_key, values)| values.len() > 1); + + let content_type = remove_field(&mut fields, &CONTENT_TYPE_TAG); + let parent = remove_field(&mut fields, &PARENT_TAG); + let metaprotocol = remove_field(&mut fields, &METAPROTOCOL_TAG); + + let unrecognized_even_field = fields + .keys() + .any(|tag| tag.first().map(|lsb| lsb % 2 == 0).unwrap_or_default()); + + Self { + payload: Inscription { + body: body.map(|i| { + envelope.payload[i + 1..] + .iter() + .flatten() + .cloned() + .collect() + }), + content_type, + parent, + unrecognized_even_field, + duplicate_field, + incomplete_field, + metaprotocol, + }, + input: envelope.input, + offset: envelope.offset, + } + } +} + +impl ParsedEnvelope { + pub(crate) fn from_transaction(transaction: &Transaction) -> Vec { + RawEnvelope::from_transaction(transaction) + .into_iter() + .map(|envelope| envelope.into()) + .collect() + } +} + +impl RawEnvelope { + pub(crate) fn from_transaction(transaction: &Transaction) -> Vec { + let mut envelopes = Vec::new(); + + for (i, input) in transaction.input.iter().enumerate() { + if let Some(tapscript) = input.witness.tapscript() { + if let Ok(input_envelopes) = Self::from_tapscript(tapscript, i) { + envelopes.extend(input_envelopes); + } + } + } + + envelopes + } + + fn from_tapscript(tapscript: &Script, input: usize) -> Result> { + let mut envelopes = Vec::new(); + + let mut instructions = tapscript.instructions(); + + while let Some(instruction) = instructions.next() { + if instruction? == Instruction::PushBytes((&[]).into()) { + if let Some(envelope) = Self::from_instructions(&mut instructions, input, envelopes.len())? + { + envelopes.push(envelope); + } + } + } + + Ok(envelopes) + } + + fn from_instructions( + instructions: &mut Instructions, + input: usize, + offset: usize, + ) -> Result> { + if instructions.next().transpose()? != Some(Instruction::Op(opcodes::all::OP_IF)) { + return Ok(None); + } + + if instructions.next().transpose()? != Some(Instruction::PushBytes((&PROTOCOL_ID).into())) { + return Ok(None); + } + + let mut payload = Vec::new(); + + loop { + match instructions.next().transpose()? { + None => return Ok(None), + Some(Instruction::Op(opcodes::all::OP_ENDIF)) => { + return Ok(Some(Envelope { + payload, + input: input.try_into().unwrap(), + offset: offset.try_into().unwrap(), + })); + } + Some(Instruction::PushBytes(push)) => { + payload.push(push.as_bytes().to_vec()); + } + Some(_) => return Ok(None), + } + } + } +} + +#[cfg(test)] +mod tests { + use {super::*, bitcoin::absolute::LockTime}; + + fn parse(witnesses: &[Witness]) -> Vec { + ParsedEnvelope::from_transaction(&Transaction { + version: 0, + lock_time: LockTime::ZERO, + input: witnesses + .iter() + .map(|witness| TxIn { + previous_output: OutPoint::null(), + script_sig: ScriptBuf::new(), + sequence: Sequence::ENABLE_RBF_NO_LOCKTIME, + witness: witness.clone(), + }) + .collect(), + output: Vec::new(), + }) + } + + #[test] + fn empty() { + assert_eq!(parse(&[Witness::new()]), Vec::new()) + } + + #[test] + fn ignore_key_path_spends() { + assert_eq!( + parse(&[Witness::from_slice(&[bitcoin::script::Builder::new() + .push_opcode(bitcoin::opcodes::OP_FALSE) + .push_opcode(bitcoin::opcodes::all::OP_IF) + .push_slice(b"ord") + .push_opcode(bitcoin::opcodes::all::OP_ENDIF) + .into_script() + .into_bytes()])]), + Vec::new() + ); + } + + #[test] + fn ignore_key_path_spends_with_annex() { + assert_eq!( + parse(&[Witness::from_slice(&[ + bitcoin::script::Builder::new() + .push_opcode(bitcoin::opcodes::OP_FALSE) + .push_opcode(bitcoin::opcodes::all::OP_IF) + .push_slice(b"ord") + .push_opcode(bitcoin::opcodes::all::OP_ENDIF) + .into_script() + .into_bytes(), + vec![0x50] + ])]), + Vec::new() + ); + } + + #[test] + fn parse_from_tapscript() { + assert_eq!( + parse(&[Witness::from_slice(&[ + bitcoin::script::Builder::new() + .push_opcode(bitcoin::opcodes::OP_FALSE) + .push_opcode(bitcoin::opcodes::all::OP_IF) + .push_slice(b"ord") + .push_opcode(bitcoin::opcodes::all::OP_ENDIF) + .into_script() + .into_bytes(), + Vec::new() + ])]), + vec![ParsedEnvelope { + payload: Inscription::default(), + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn ignore_unparsable_scripts() { + let mut script_bytes = bitcoin::script::Builder::new() + .push_opcode(bitcoin::opcodes::OP_FALSE) + .push_opcode(bitcoin::opcodes::all::OP_IF) + .push_slice(b"ord") + .push_opcode(bitcoin::opcodes::all::OP_ENDIF) + .into_script() + .into_bytes(); + script_bytes.push(0x01); + + assert_eq!( + parse(&[Witness::from_slice(&[script_bytes, Vec::new()])]), + Vec::new() + ); + } + + #[test] + fn no_inscription() { + assert_eq!( + parse(&[Witness::from_slice(&[ + ScriptBuf::new().into_bytes(), + Vec::new() + ])]), + Vec::new() + ); + } + + #[test] + fn duplicate_field() { + assert_eq!( + parse(&[envelope(&[b"ord", &[255], &[], &[255], &[]])]), + vec![ParsedEnvelope { + payload: Inscription { + duplicate_field: true, + ..Default::default() + }, + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn with_content_type() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[], + b"ord", + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "ord"), + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn with_unknown_tag() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[9], + b"bar", + &[], + b"ord", + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "ord"), + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn no_body() { + assert_eq!( + parse(&[envelope(&[b"ord", &[1], b"text/plain;charset=utf-8"])]), + vec![ParsedEnvelope { + payload: Inscription { + content_type: Some(b"text/plain;charset=utf-8".to_vec()), + ..Default::default() + }, + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn no_content_type() { + assert_eq!( + parse(&[envelope(&[b"ord", &[], b"foo"])]), + vec![ParsedEnvelope { + payload: Inscription { + body: Some(b"foo".to_vec()), + ..Default::default() + }, + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn valid_body_in_multiple_pushes() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[], + b"foo", + b"bar" + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "foobar"), + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn valid_body_in_zero_pushes() { + assert_eq!( + parse(&[envelope(&[b"ord", &[1], b"text/plain;charset=utf-8", &[]])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", ""), + input: 0, + offset: 0 + }] + ); + } + + #[test] + fn valid_body_in_multiple_empty_pushes() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[], + &[], + &[], + &[], + &[], + &[], + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", ""), + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn valid_ignore_trailing() { + let script = script::Builder::new() + .push_opcode(opcodes::OP_FALSE) + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .push_slice([1]) + .push_slice(b"text/plain;charset=utf-8") + .push_slice([]) + .push_slice(b"ord") + .push_opcode(opcodes::all::OP_ENDIF) + .push_opcode(opcodes::all::OP_CHECKSIG) + .into_script(); + + assert_eq!( + parse(&[Witness::from_slice(&[script.into_bytes(), Vec::new()])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "ord"), + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn valid_ignore_preceding() { + let script = script::Builder::new() + .push_opcode(opcodes::all::OP_CHECKSIG) + .push_opcode(opcodes::OP_FALSE) + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .push_slice([1]) + .push_slice(b"text/plain;charset=utf-8") + .push_slice([]) + .push_slice(b"ord") + .push_opcode(opcodes::all::OP_ENDIF) + .into_script(); + + assert_eq!( + parse(&[Witness::from_slice(&[script.into_bytes(), Vec::new()])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "ord"), + input: 0, + offset: 0 + }], + ); + } + + #[test] + fn multiple_inscriptions_in_a_single_witness() { + let script = script::Builder::new() + .push_opcode(opcodes::OP_FALSE) + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .push_slice([1]) + .push_slice(b"text/plain;charset=utf-8") + .push_slice([]) + .push_slice(b"foo") + .push_opcode(opcodes::all::OP_ENDIF) + .push_opcode(opcodes::OP_FALSE) + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .push_slice([1]) + .push_slice(b"text/plain;charset=utf-8") + .push_slice([]) + .push_slice(b"bar") + .push_opcode(opcodes::all::OP_ENDIF) + .into_script(); + + assert_eq!( + parse(&[Witness::from_slice(&[script.into_bytes(), Vec::new()])]), + vec![ + ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "foo"), + input: 0, + offset: 0 + }, + ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "bar"), + input: 0, + offset: 1 + }, + ], + ); + } + + #[test] + fn invalid_utf8_does_not_render_inscription_invalid() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[], + &[0b10000000] + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", [0b10000000]), + input: 0, + offset: 0 + },], + ); + } + + #[test] + fn no_endif() { + let script = script::Builder::new() + .push_opcode(opcodes::OP_FALSE) + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .into_script(); + + assert_eq!( + parse(&[Witness::from_slice(&[script.into_bytes(), Vec::new()])]), + Vec::new(), + ); + } + + #[test] + fn no_op_false() { + let script = script::Builder::new() + .push_opcode(opcodes::all::OP_IF) + .push_slice(b"ord") + .push_opcode(opcodes::all::OP_ENDIF) + .into_script(); + + assert_eq!( + parse(&[Witness::from_slice(&[script.into_bytes(), Vec::new()])]), + Vec::new(), + ); + } + + #[test] + fn empty_envelope() { + assert_eq!(parse(&[envelope(&[])]), Vec::new()); + } + + #[test] + fn wrong_protocol_identifier() { + assert_eq!(parse(&[envelope(&[b"foo"])]), Vec::new()); + } + + #[test] + fn extract_from_transaction() { + assert_eq!( + parse(&[envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[], + b"ord" + ])]), + vec![ParsedEnvelope { + payload: inscription("text/plain;charset=utf-8", "ord"), + input: 0, + offset: 0, + }], + ); + } + + #[test] + fn extract_from_second_input() { + assert_eq!( + parse(&[Witness::new(), inscription("foo", [1; 1040]).to_witness()]), + vec![ParsedEnvelope { + payload: inscription("foo", [1; 1040]), + input: 1, + offset: 0, + }] + ); + } + + #[test] + fn extract_from_second_envelope() { + let mut builder = script::Builder::new(); + builder = inscription("foo", [1; 100]).append_reveal_script_to_builder(builder); + builder = inscription("bar", [1; 100]).append_reveal_script_to_builder(builder); + + assert_eq!( + parse(&[Witness::from_slice(&[ + builder.into_script().into_bytes(), + Vec::new() + ])]), + vec![ + ParsedEnvelope { + payload: inscription("foo", [1; 100]), + input: 0, + offset: 0, + }, + ParsedEnvelope { + payload: inscription("bar", [1; 100]), + input: 0, + offset: 1, + } + ] + ); + } + + #[test] + fn inscribe_png() { + assert_eq!( + parse(&[envelope(&[b"ord", &[1], b"image/png", &[], &[1; 100]])]), + vec![ParsedEnvelope { + payload: inscription("image/png", [1; 100]), + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn chunked_data_is_parsable() { + let mut witness = Witness::new(); + + witness.push(&inscription("foo", [1; 1040]).append_reveal_script(script::Builder::new())); + + witness.push([]); + + assert_eq!( + parse(&[witness]), + vec![ParsedEnvelope { + payload: inscription("foo", [1; 1040]), + input: 0, + offset: 0, + }] + ); + } + + #[test] + fn round_trip_with_no_fields() { + let mut witness = Witness::new(); + + witness.push(Inscription::default().append_reveal_script(script::Builder::new())); + + witness.push([]); + + assert_eq!( + parse(&[witness]), + vec![ParsedEnvelope { + payload: Inscription::default(), + input: 0, + offset: 0, + }], + ); + } + + #[test] + fn unknown_odd_fields_are_ignored() { + assert_eq!( + parse(&[envelope(&[b"ord", &[9], &[0]])]), + vec![ParsedEnvelope { + payload: Inscription::default(), + input: 0, + offset: 0, + }], + ); + } + + #[test] + fn unknown_even_fields() { + assert_eq!( + parse(&[envelope(&[b"ord", &[22], &[0]])]), + vec![ParsedEnvelope { + payload: Inscription { + unrecognized_even_field: true, + ..Default::default() + }, + input: 0, + offset: 0, + }], + ); + } + + #[test] + fn incomplete_field() { + assert_eq!( + parse(&[envelope(&[b"ord", &[99]])]), + vec![ParsedEnvelope { + payload: Inscription { + incomplete_field: true, + ..Default::default() + }, + input: 0, + offset: 0, + }], + ); + } +} diff --git a/src/index.rs b/src/index.rs index 346836c82f..268ae921a3 100644 --- a/src/index.rs +++ b/src/index.rs @@ -731,9 +731,10 @@ impl Index { } Ok(self.get_transaction(inscription_id.txid)?.and_then(|tx| { - Inscription::from_transaction(&tx) - .get(inscription_id.index as usize) - .map(|transaction_inscription| transaction_inscription.inscription.clone()) + ParsedEnvelope::from_transaction(&tx) + .into_iter() + .nth(inscription_id.index as usize) + .map(|envelope| envelope.payload) })) } @@ -2722,6 +2723,68 @@ mod tests { } } + #[test] + fn duplicate_field_inscriptions_are_cursed() { + for context in Context::configurations() { + context.mine_blocks(1); + + let witness = envelope(&[ + b"ord", + &[1], + b"text/plain;charset=utf-8", + &[1], + b"text/plain;charset=utf-8", + ]); + + let txid = context.rpc_server.broadcast_tx(TransactionTemplate { + inputs: &[(1, 0, 0, witness)], + ..Default::default() + }); + + let inscription_id = InscriptionId { txid, index: 0 }; + + context.mine_blocks(1); + + assert_eq!( + context + .index + .get_inscription_entry(inscription_id) + .unwrap() + .unwrap() + .inscription_number, + -1 + ); + } + } + + #[test] + fn incomplete_field_inscriptions_are_cursed() { + for context in Context::configurations() { + context.mine_blocks(1); + + let witness = envelope(&[b"ord", &[1]]); + + let txid = context.rpc_server.broadcast_tx(TransactionTemplate { + inputs: &[(1, 0, 0, witness)], + ..Default::default() + }); + + let inscription_id = InscriptionId { txid, index: 0 }; + + context.mine_blocks(1); + + assert_eq!( + context + .index + .get_inscription_entry(inscription_id) + .unwrap() + .unwrap() + .inscription_number, + -1 + ); + } + } + #[test] // https://github.com/ordinals/ord/issues/2062 fn zero_value_transaction_inscription_not_cursed_but_unbound() { diff --git a/src/index/updater.rs b/src/index/updater.rs index 21a116e1fb..0a4a918995 100644 --- a/src/index/updater.rs +++ b/src/index/updater.rs @@ -519,7 +519,7 @@ impl<'index> Updater<'_> { } } else { for (tx, txid) in block.txdata.iter().skip(1).chain(block.txdata.first()) { - inscription_updater.index_transaction_inscriptions(tx, *txid, None)?; + inscription_updater.index_envelopes(tx, *txid, None)?; } } @@ -571,7 +571,7 @@ impl<'index> Updater<'_> { index_inscriptions: bool, ) -> Result { if index_inscriptions { - inscription_updater.index_transaction_inscriptions(tx, txid, Some(input_sat_ranges))?; + inscription_updater.index_envelopes(tx, txid, Some(input_sat_ranges))?; } for (vout, output) in tx.output.iter().enumerate() { diff --git a/src/index/updater/inscription_updater.rs b/src/index/updater/inscription_updater.rs index 687c1ce47e..70b6a34435 100644 --- a/src/index/updater/inscription_updater.rs +++ b/src/index/updater/inscription_updater.rs @@ -100,13 +100,13 @@ impl<'a, 'db, 'tx> InscriptionUpdater<'a, 'db, 'tx> { }) } - pub(super) fn index_transaction_inscriptions( + pub(super) fn index_envelopes( &mut self, tx: &Transaction, txid: Txid, input_sat_ranges: Option<&VecDeque<(u64, u64)>>, ) -> Result { - let mut new_inscriptions = Inscription::from_transaction(tx).into_iter().peekable(); + let mut envelopes = ParsedEnvelope::from_transaction(tx).into_iter().peekable(); let mut floating_inscriptions = Vec::new(); let mut inscribed_offsets = BTreeMap::new(); let mut total_input_value = 0; @@ -161,8 +161,8 @@ impl<'a, 'db, 'tx> InscriptionUpdater<'a, 'db, 'tx> { total_input_value += current_input_value; // go through all inscriptions in this input - while let Some(inscription) = new_inscriptions.peek() { - if inscription.tx_in_index != u32::try_from(input_index).unwrap() { + while let Some(inscription) = envelopes.peek() { + if inscription.input != u32::try_from(input_index).unwrap() { break; } @@ -171,11 +171,15 @@ impl<'a, 'db, 'tx> InscriptionUpdater<'a, 'db, 'tx> { index: id_counter, }; - let curse = if inscription.inscription.unrecognized_even_field { + let curse = if inscription.payload.unrecognized_even_field { Some(Curse::UnrecognizedEvenField) - } else if inscription.tx_in_index != 0 { + } else if inscription.payload.duplicate_field { + Some(Curse::DuplicateField) + } else if inscription.payload.incomplete_field { + Some(Curse::IncompleteField) + } else if inscription.input != 0 { Some(Curse::NotInFirstInput) - } else if inscription.tx_in_offset != 0 { + } else if inscription.offset != 0 { Some(Curse::NotAtOffsetZero) } else if inscribed_offsets.contains_key(&offset) { let seq_num = self.id_to_entry.len()?; @@ -220,7 +224,7 @@ impl<'a, 'db, 'tx> InscriptionUpdater<'a, 'db, 'tx> { }; let unbound = current_input_value == 0 - || inscription.tx_in_offset != 0 + || inscription.offset != 0 || curse == Some(Curse::UnrecognizedEvenField); if curse.is_some() || unbound { @@ -238,12 +242,12 @@ impl<'a, 'db, 'tx> InscriptionUpdater<'a, 'db, 'tx> { origin: Origin::New { cursed, fee: 0, - parent: inscription.inscription.parent(), + parent: inscription.payload.parent(), unbound, }, }); - new_inscriptions.next(); + envelopes.next(); id_counter += 1; } } diff --git a/src/inscription.rs b/src/inscription.rs index 3c9490dd17..4c6fb00627 100644 --- a/src/inscription.rs +++ b/src/inscription.rs @@ -3,24 +3,19 @@ use { bitcoin::{ blockdata::{ opcodes, - script::{self, Instruction, Instructions, PushBytesBuf}, + script::{self, PushBytesBuf}, }, - ScriptBuf, Witness, + ScriptBuf, }, - std::{iter::Peekable, str}, + std::str, }; -const PROTOCOL_ID: [u8; 3] = *b"ord"; - -const BODY_TAG: [u8; 0] = []; -const CONTENT_TYPE_TAG: [u8; 1] = [1]; -const PARENT_TAG: [u8; 1] = [3]; -const METAPROTOCOL_TAG: [u8; 1] = [7]; - #[derive(Debug, PartialEq, Clone)] pub(crate) enum Curse { - NotInFirstInput, + DuplicateField, + IncompleteField, NotAtOffsetZero, + NotInFirstInput, Reinscription, UnrecognizedEvenField, } @@ -32,13 +27,8 @@ pub struct Inscription { pub parent: Option>, pub metaprotocol: Option>, pub unrecognized_even_field: bool, -} - -#[derive(Debug, PartialEq, Clone)] -pub(crate) struct TransactionInscription { - pub(crate) inscription: Inscription, - pub(crate) tx_in_index: u32, - pub(crate) tx_in_offset: u32, + pub duplicate_field: bool, + pub incomplete_field: bool, } impl Inscription { @@ -51,29 +41,6 @@ impl Inscription { } } - pub(crate) fn from_transaction(tx: &Transaction) -> Vec { - let mut result = Vec::new(); - for (index, tx_in) in tx.input.iter().enumerate() { - let Ok(inscriptions) = InscriptionParser::parse(&tx_in.witness) else { - continue; - }; - - result.extend( - inscriptions - .into_iter() - .enumerate() - .map(|(offset, inscription)| TransactionInscription { - inscription, - tx_in_index: u32::try_from(index).unwrap(), - tx_in_offset: u32::try_from(offset).unwrap(), - }) - .collect::>(), - ) - } - - result - } - pub(crate) fn from_file( chain: Chain, path: impl AsRef, @@ -98,36 +65,41 @@ impl Inscription { content_type: Some(content_type.into()), parent: parent.map(|id| id.parent_value()), metaprotocol: metaprotocol.map(|metaprotocol| metaprotocol.into_bytes()), + duplicate_field: false, unrecognized_even_field: false, + incomplete_field: false, }) } - fn append_reveal_script_to_builder(&self, mut builder: script::Builder) -> script::Builder { + pub(crate) fn append_reveal_script_to_builder( + &self, + mut builder: script::Builder, + ) -> script::Builder { builder = builder .push_opcode(opcodes::OP_FALSE) .push_opcode(opcodes::all::OP_IF) - .push_slice(PROTOCOL_ID); + .push_slice(envelope::PROTOCOL_ID); if let Some(content_type) = self.content_type.clone() { builder = builder - .push_slice(CONTENT_TYPE_TAG) + .push_slice(envelope::CONTENT_TYPE_TAG) .push_slice(PushBytesBuf::try_from(content_type).unwrap()); } if let Some(protocol) = self.metaprotocol.clone() { builder = builder - .push_slice(METAPROTOCOL_TAG) + .push_slice(envelope::METAPROTOCOL_TAG) .push_slice(PushBytesBuf::try_from(protocol).unwrap()); } if let Some(parent) = self.parent.clone() { builder = builder - .push_slice(PARENT_TAG) + .push_slice(envelope::PARENT_TAG) .push_slice(PushBytesBuf::try_from(parent).unwrap()); } if let Some(body) = &self.body { - builder = builder.push_slice(BODY_TAG); + builder = builder.push_slice(envelope::BODY_TAG); for chunk in body.chunks(520) { builder = builder.push_slice(PushBytesBuf::try_from(chunk.to_vec()).unwrap()); } @@ -220,519 +192,10 @@ impl Inscription { } } -#[derive(Debug, PartialEq)] -pub(crate) enum InscriptionError { - InvalidInscription, - NoInscription, - NoTapscript, - Script(script::Error), -} - -type Result = std::result::Result; - -#[derive(Debug)] -struct InscriptionParser<'a> { - instructions: Peekable>, -} - -impl<'a> InscriptionParser<'a> { - fn parse(witness: &Witness) -> Result> { - let Some(tapscript) = witness.tapscript() else { - return Err(InscriptionError::NoTapscript); - }; - - InscriptionParser { - instructions: tapscript.instructions().peekable(), - } - .parse_inscriptions() - .into_iter() - .collect() - } - - fn parse_inscriptions(&mut self) -> Vec> { - let mut inscriptions = Vec::new(); - loop { - let current = self.parse_one_inscription(); - if current == Err(InscriptionError::NoInscription) { - break; - } - inscriptions.push(current); - } - - inscriptions - } - - fn parse_one_inscription(&mut self) -> Result { - self.advance_into_inscription_envelope()?; - let mut fields = BTreeMap::new(); - - loop { - match self.advance()? { - Instruction::PushBytes(tag) if tag.as_bytes() == BODY_TAG.as_slice() => { - let mut body = Vec::new(); - while !self.accept(&Instruction::Op(opcodes::all::OP_ENDIF))? { - body.extend_from_slice(self.expect_push()?); - } - fields.insert(BODY_TAG.as_slice(), body); - break; - } - Instruction::PushBytes(tag) => { - if fields.contains_key(tag.as_bytes()) { - return Err(InscriptionError::InvalidInscription); - } - fields.insert(tag.as_bytes(), self.expect_push()?.to_vec()); - } - Instruction::Op(opcodes::all::OP_ENDIF) => break, - _ => return Err(InscriptionError::InvalidInscription), - } - } - - let body = fields.remove(BODY_TAG.as_slice()); - let content_type = fields.remove(CONTENT_TYPE_TAG.as_slice()); - let parent = fields.remove(PARENT_TAG.as_slice()); - let metaprotocol = fields.remove(METAPROTOCOL_TAG.as_slice()); - let mut unrecognized_even_field = false; - - for tag in fields.keys() { - if let Some(lsb) = tag.first() { - if lsb % 2 == 0 { - unrecognized_even_field = true; - } - } - } - - Ok(Inscription { - body, - content_type, - parent, - unrecognized_even_field, - metaprotocol, - }) - } - - fn advance(&mut self) -> Result> { - self - .instructions - .next() - .ok_or(InscriptionError::NoInscription)? - .map_err(InscriptionError::Script) - } - - fn advance_into_inscription_envelope(&mut self) -> Result<()> { - loop { - if self.match_instructions(&[ - Instruction::PushBytes((&[]).into()), // represents an OF_FALSE - Instruction::Op(opcodes::all::OP_IF), - Instruction::PushBytes((&PROTOCOL_ID).into()), - ])? { - break; - } - } - - Ok(()) - } - - fn match_instructions(&mut self, instructions: &[Instruction]) -> Result { - for instruction in instructions { - if &self.advance()? != instruction { - return Ok(false); - } - } - - Ok(true) - } - - fn expect_push(&mut self) -> Result<&'a [u8]> { - match self.advance()? { - Instruction::PushBytes(bytes) => Ok(bytes.as_bytes()), - _ => Err(InscriptionError::InvalidInscription), - } - } - - fn accept(&mut self, instruction: &Instruction) -> Result { - match self.instructions.peek() { - Some(Ok(next)) => { - if next == instruction { - self.advance()?; - Ok(true) - } else { - Ok(false) - } - } - Some(Err(err)) => Err(InscriptionError::Script(*err)), - None => Ok(false), - } - } -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn empty() { - assert_eq!( - InscriptionParser::parse(&Witness::new()), - Err(InscriptionError::NoTapscript) - ); - } - - #[test] - fn ignore_key_path_spends() { - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[Vec::new()])), - Err(InscriptionError::NoTapscript), - ); - } - - #[test] - fn ignore_key_path_spends_with_annex() { - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[Vec::new(), vec![0x50]])), - Err(InscriptionError::NoTapscript), - ); - } - - #[test] - fn ignore_unparsable_scripts() { - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[vec![0x01], Vec::new()])), - Err(InscriptionError::Script(script::Error::EarlyEndOfScript)), - ); - } - - #[test] - fn no_inscription() { - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[ - ScriptBuf::new().into_bytes(), - Vec::new() - ])), - Ok(vec![]) - ); - } - - #[test] - fn duplicate_field() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[1], - b"text/plain;charset=utf-8", - &[], - b"ord", - ])), - Err(InscriptionError::InvalidInscription), - ); - } - - #[test] - fn valid() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[], - b"ord", - ])), - Ok(vec![inscription("text/plain;charset=utf-8", "ord")]), - ); - } - - #[test] - fn valid_with_unknown_tag() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[9], - b"bar", - &[], - b"ord", - ])), - Ok(vec![inscription("text/plain;charset=utf-8", "ord")]), - ); - } - - #[test] - fn no_content_tag() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[1], b"text/plain;charset=utf-8"])), - Ok(vec![Inscription { - content_type: Some(b"text/plain;charset=utf-8".to_vec()), - ..Default::default() - }]), - ); - } - - #[test] - fn no_content_type() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[], b"foo"])), - Ok(vec![Inscription { - body: Some(b"foo".to_vec()), - ..Default::default() - }]), - ); - } - - #[test] - fn valid_body_in_multiple_pushes() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[], - b"foo", - b"bar" - ])), - Ok(vec![inscription("text/plain;charset=utf-8", "foobar")]), - ); - } - - #[test] - fn valid_body_in_zero_pushes() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[1], b"text/plain;charset=utf-8", &[]])), - Ok(vec![inscription("text/plain;charset=utf-8", "")]), - ); - } - - #[test] - fn valid_body_in_multiple_empty_pushes() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[], - &[], - &[], - &[], - &[], - &[], - ])), - Ok(vec![inscription("text/plain;charset=utf-8", "")]), - ); - } - - #[test] - fn valid_ignore_trailing() { - let script = script::Builder::new() - .push_opcode(opcodes::OP_FALSE) - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .push_slice([1]) - .push_slice(b"text/plain;charset=utf-8") - .push_slice([]) - .push_slice(b"ord") - .push_opcode(opcodes::all::OP_ENDIF) - .push_opcode(opcodes::all::OP_CHECKSIG) - .into_script(); - - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[script.into_bytes(), Vec::new()])), - Ok(vec![inscription("text/plain;charset=utf-8", "ord")]), - ); - } - - #[test] - fn valid_ignore_preceding() { - let script = script::Builder::new() - .push_opcode(opcodes::all::OP_CHECKSIG) - .push_opcode(opcodes::OP_FALSE) - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .push_slice([1]) - .push_slice(b"text/plain;charset=utf-8") - .push_slice([]) - .push_slice(b"ord") - .push_opcode(opcodes::all::OP_ENDIF) - .into_script(); - - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[script.into_bytes(), Vec::new()])), - Ok(vec![inscription("text/plain;charset=utf-8", "ord")]), - ); - } - - #[test] - fn do_not_ignore_inscriptions_after_first() { - let script = script::Builder::new() - .push_opcode(opcodes::OP_FALSE) - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .push_slice([1]) - .push_slice(b"text/plain;charset=utf-8") - .push_slice([]) - .push_slice(b"foo") - .push_opcode(opcodes::all::OP_ENDIF) - .push_opcode(opcodes::OP_FALSE) - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .push_slice([1]) - .push_slice(b"text/plain;charset=utf-8") - .push_slice([]) - .push_slice(b"bar") - .push_opcode(opcodes::all::OP_ENDIF) - .into_script(); - - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[script.into_bytes(), Vec::new()])), - Ok(vec![ - inscription("text/plain;charset=utf-8", "foo"), - inscription("text/plain;charset=utf-8", "bar") - ]), - ); - } - - #[test] - fn invalid_utf8_does_not_render_inscription_invalid() { - assert_eq!( - InscriptionParser::parse(&envelope(&[ - b"ord", - &[1], - b"text/plain;charset=utf-8", - &[], - &[0b10000000] - ])), - Ok(vec![inscription("text/plain;charset=utf-8", [0b10000000])]), - ); - } - - #[test] - fn no_endif() { - let script = script::Builder::new() - .push_opcode(opcodes::OP_FALSE) - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .into_script(); - - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[script.into_bytes(), Vec::new()])), - Ok(vec![]) - ); - } - - #[test] - fn no_op_false() { - let script = script::Builder::new() - .push_opcode(opcodes::all::OP_IF) - .push_slice(b"ord") - .push_opcode(opcodes::all::OP_ENDIF) - .into_script(); - - assert_eq!( - InscriptionParser::parse(&Witness::from_slice(&[script.into_bytes(), Vec::new()])), - Ok(vec![]) - ); - } - - #[test] - fn empty_envelope() { - assert_eq!(InscriptionParser::parse(&envelope(&[])), Ok(vec![])); - } - - #[test] - fn wrong_magic_number() { - assert_eq!(InscriptionParser::parse(&envelope(&[b"foo"])), Ok(vec![])); - } - - #[test] - fn extract_from_transaction() { - let tx = Transaction { - version: 0, - lock_time: bitcoin::locktime::absolute::LockTime::ZERO, - input: vec![TxIn { - previous_output: OutPoint::null(), - script_sig: ScriptBuf::new(), - sequence: Sequence(0), - witness: envelope(&[b"ord", &[1], b"text/plain;charset=utf-8", &[], b"ord"]), - }], - output: Vec::new(), - }; - - assert_eq!( - Inscription::from_transaction(&tx), - vec![transaction_inscription( - "text/plain;charset=utf-8", - "ord", - 0, - 0 - )], - ); - } - - #[test] - fn extract_from_second_input() { - let tx = Transaction { - version: 0, - lock_time: bitcoin::locktime::absolute::LockTime::ZERO, - input: vec![ - TxIn { - previous_output: OutPoint::null(), - script_sig: ScriptBuf::new(), - sequence: Sequence(0), - witness: Witness::new(), - }, - TxIn { - previous_output: OutPoint::null(), - script_sig: ScriptBuf::new(), - sequence: Sequence(0), - witness: inscription("foo", [1; 1040]).to_witness(), - }, - ], - output: Vec::new(), - }; - - assert_eq!( - Inscription::from_transaction(&tx), - vec![transaction_inscription("foo", [1; 1040], 1, 0)] - ); - } - - #[test] - fn extract_from_second_envelope() { - let mut builder = script::Builder::new(); - builder = inscription("foo", [1; 100]).append_reveal_script_to_builder(builder); - builder = inscription("bar", [1; 100]).append_reveal_script_to_builder(builder); - - let witness = Witness::from_slice(&[builder.into_script().into_bytes(), Vec::new()]); - - let tx = Transaction { - version: 0, - lock_time: bitcoin::locktime::absolute::LockTime::ZERO, - input: vec![TxIn { - previous_output: OutPoint::null(), - script_sig: ScriptBuf::new(), - sequence: Sequence(0), - witness, - }], - output: Vec::new(), - }; - - assert_eq!( - Inscription::from_transaction(&tx), - vec![ - transaction_inscription("foo", [1; 100], 0, 0), - transaction_inscription("bar", [1; 100], 0, 1) - ] - ); - } - - #[test] - fn inscribe_png() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[1], b"image/png", &[], &[1; 100]])), - Ok(vec![inscription("image/png", [1; 100])]), - ); - } - #[test] fn reveal_script_chunks_data() { assert_eq!( @@ -784,55 +247,6 @@ mod tests { ); } - #[test] - fn chunked_data_is_parsable() { - let mut witness = Witness::new(); - - witness.push(&inscription("foo", [1; 1040]).append_reveal_script(script::Builder::new())); - - witness.push([]); - - assert_eq!( - InscriptionParser::parse(&witness).unwrap(), - vec![inscription("foo", [1; 1040])], - ); - } - - #[test] - fn round_trip_with_no_fields() { - let mut witness = Witness::new(); - - witness.push(Inscription::default().append_reveal_script(script::Builder::new())); - - witness.push([]); - - assert_eq!( - InscriptionParser::parse(&witness).unwrap(), - vec![Inscription::default()] - ); - } - - #[test] - fn unknown_odd_fields_are_ignored() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[9], &[0]])), - Ok(vec![Inscription { - ..Default::default() - }]), - ); - } - - #[test] - fn unknown_even_fields() { - assert_eq!( - InscriptionParser::parse(&envelope(&[b"ord", &[2], &[0]])), - Ok(vec![Inscription { - unrecognized_even_field: true, - ..Default::default() - }]), - ); - } - #[test] fn inscription_with_no_parent_field_has_no_parent() { assert!(Inscription { diff --git a/src/lib.rs b/src/lib.rs index fd114770ad..cfeeff6164 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ use { decimal::Decimal, degree::Degree, deserialize_from_str::DeserializeFromStr, + envelope::ParsedEnvelope, epoch::Epoch, height::Height, index::{Index, List}, @@ -102,6 +103,7 @@ mod config; mod decimal; mod degree; mod deserialize_from_str; +mod envelope; mod epoch; mod fee_rate; mod height; diff --git a/src/subcommand/decode.rs b/src/subcommand/decode.rs index 9523657bb7..61258778c5 100644 --- a/src/subcommand/decode.rs +++ b/src/subcommand/decode.rs @@ -18,12 +18,12 @@ impl Decode { Transaction::consensus_decode(&mut io::stdin())? }; - let inscriptions = Inscription::from_transaction(&transaction); + let inscriptions = ParsedEnvelope::from_transaction(&transaction); Ok(Box::new(Output { inscriptions: inscriptions .into_iter() - .map(|inscription| inscription.inscription) + .map(|inscription| inscription.payload) .collect(), })) } diff --git a/src/test.rs b/src/test.rs index cdc70671ce..1c7be2f9bc 100644 --- a/src/test.rs +++ b/src/test.rs @@ -1,6 +1,5 @@ pub(crate) use { super::*, - crate::inscription::TransactionInscription, bitcoin::{ blockdata::{opcodes, script, script::PushBytesBuf}, ScriptBuf, Witness, @@ -116,19 +115,6 @@ pub(crate) fn inscription(content_type: &str, body: impl AsRef<[u8]>) -> Inscrip Inscription::new(Some(content_type.into()), Some(body.as_ref().into())) } -pub(crate) fn transaction_inscription( - content_type: &str, - body: impl AsRef<[u8]>, - tx_in_index: u32, - tx_in_offset: u32, -) -> TransactionInscription { - TransactionInscription { - inscription: inscription(content_type, body), - tx_in_index, - tx_in_offset, - } -} - pub(crate) fn inscription_id(n: u32) -> InscriptionId { let hex = format!("{n:x}");