fix(NIP-12): correctly search for mixed-case hex-like tags

Only lowercase and even-length tag values are stored as binary BLOBs.
Previously there was an error which search results from being returned
if the tag value was mixed-case and could be interpreted as hex.

A new database migration has been created to repair the `tag` table
for existing relays.

fixes: https://todo.sr.ht/~gheartsfield/nostr-rs-relay/37
This commit is contained in:
Greg Heartsfield 2022-08-17 16:34:11 -07:00
parent be8170342e
commit 035cf34673
No known key found for this signature in database
GPG Key ID: C7F4AA6B95F11E3A
6 changed files with 132 additions and 53 deletions

View File

@ -2,14 +2,14 @@
use crate::config::SETTINGS; use crate::config::SETTINGS;
use crate::error::Error; use crate::error::Error;
use crate::error::Result; use crate::error::Result;
use crate::event::Event; use crate::event::{single_char_tagname, Event};
use crate::hexrange::hex_range; use crate::hexrange::hex_range;
use crate::hexrange::HexSearch; use crate::hexrange::HexSearch;
use crate::nip05; use crate::nip05;
use crate::schema::{upgrade_db, STARTUP_SQL}; use crate::schema::{upgrade_db, STARTUP_SQL};
use crate::subscription::ReqFilter; use crate::subscription::ReqFilter;
use crate::subscription::Subscription; use crate::subscription::Subscription;
use crate::utils::is_hex; use crate::utils::{is_hex, is_lower_hex};
use governor::clock::Clock; use governor::clock::Clock;
use governor::{Quota, RateLimiter}; use governor::{Quota, RateLimiter};
use hex; use hex;
@ -300,17 +300,24 @@ pub fn write_event(conn: &mut PooledConnection, e: &Event) -> Result<usize> {
if tag.len() >= 2 { if tag.len() >= 2 {
let tagname = &tag[0]; let tagname = &tag[0];
let tagval = &tag[1]; let tagval = &tag[1];
// if tagvalue is hex; // only single-char tags are searchable
if is_hex(tagval) { let tagchar_opt = single_char_tagname(tagname);
tx.execute( match &tagchar_opt {
"INSERT OR IGNORE INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3)", Some(_) => {
params![ev_id, &tagname, hex::decode(&tagval).ok()], // if tagvalue is lowercase hex;
)?; if is_lower_hex(&tagval) && (tagval.len() % 2 == 0) {
} else { tx.execute(
tx.execute( "INSERT OR IGNORE INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3)",
"INSERT OR IGNORE INTO tag (event_id, name, value) VALUES (?1, ?2, ?3)", params![ev_id, &tagname, hex::decode(&tagval).ok()],
params![ev_id, &tagname, &tagval], )?;
)?; } else {
tx.execute(
"INSERT OR IGNORE INTO tag (event_id, name, value) VALUES (?1, ?2, ?3)",
params![ev_id, &tagname, &tagval],
)?;
}
}
None => {}
} }
} }
} }
@ -388,16 +395,13 @@ fn query_from_filter(f: &ReqFilter) -> (String, Vec<Box<dyn ToSql>>) {
// if the filter is malformed, don't return anything. // if the filter is malformed, don't return anything.
if f.force_no_match { if f.force_no_match {
let empty_query = let empty_query =
"SELECT DISTINCT(e.content), e.created_at FROM event e WHERE 1=0" "SELECT DISTINCT(e.content), e.created_at FROM event e WHERE 1=0".to_owned();
.to_owned();
// query parameters for SQLite // query parameters for SQLite
let empty_params: Vec<Box<dyn ToSql>> = vec![]; let empty_params: Vec<Box<dyn ToSql>> = vec![];
return (empty_query, empty_params); return (empty_query, empty_params);
} }
let mut query = let mut query = "SELECT DISTINCT(e.content), e.created_at FROM event e ".to_owned();
"SELECT DISTINCT(e.content), e.created_at FROM event e "
.to_owned();
// query parameters for SQLite // query parameters for SQLite
let mut params: Vec<Box<dyn ToSql>> = vec![]; let mut params: Vec<Box<dyn ToSql>> = vec![];
@ -470,7 +474,7 @@ fn query_from_filter(f: &ReqFilter) -> (String, Vec<Box<dyn ToSql>>) {
let mut str_vals: Vec<Box<dyn ToSql>> = vec![]; let mut str_vals: Vec<Box<dyn ToSql>> = vec![];
let mut blob_vals: Vec<Box<dyn ToSql>> = vec![]; let mut blob_vals: Vec<Box<dyn ToSql>> = vec![];
for v in val { for v in val {
if is_hex(v) { if (v.len()%2==0) && is_lower_hex(v) {
if let Ok(h) = hex::decode(&v) { if let Ok(h) = hex::decode(&v) {
blob_vals.push(Box::new(h)); blob_vals.push(Box::new(h));
} }
@ -481,7 +485,7 @@ fn query_from_filter(f: &ReqFilter) -> (String, Vec<Box<dyn ToSql>>) {
// create clauses with "?" params for each tag value being searched // create clauses with "?" params for each tag value being searched
let str_clause = format!("value IN ({})", repeat_vars(str_vals.len())); let str_clause = format!("value IN ({})", repeat_vars(str_vals.len()));
let blob_clause = format!("value_hex IN ({})", repeat_vars(blob_vals.len())); let blob_clause = format!("value_hex IN ({})", repeat_vars(blob_vals.len()));
// find evidence of the target tag name/value existing for this event. // find evidence of the target tag name/value existing for this event.
let tag_clause = format!("e.id IN (SELECT e.id FROM event e LEFT JOIN tag t on e.id=t.event_id WHERE hidden!=TRUE and (name=? AND ({} OR {})))", str_clause, blob_clause); let tag_clause = format!("e.id IN (SELECT e.id FROM event e LEFT JOIN tag t on e.id=t.event_id WHERE hidden!=TRUE and (name=? AND ({} OR {})))", str_clause, blob_clause);
// add the tag name as the first parameter // add the tag name as the first parameter
params.push(Box::new(key.to_string())); params.push(Box::new(key.to_string()));
@ -552,6 +556,7 @@ fn query_from_sub(sub: &Subscription) -> (String, Vec<Box<dyn ToSql>>) {
/// query is immediately aborted. /// query is immediately aborted.
pub async fn db_query( pub async fn db_query(
sub: Subscription, sub: Subscription,
client_id: String,
pool: SqlitePool, pool: SqlitePool,
query_tx: tokio::sync::mpsc::Sender<QueryResult>, query_tx: tokio::sync::mpsc::Sender<QueryResult>,
mut abandon_query_rx: tokio::sync::oneshot::Receiver<()>, mut abandon_query_rx: tokio::sync::oneshot::Receiver<()>,
@ -573,13 +578,18 @@ pub async fn db_query(
let mut first_result = true; let mut first_result = true;
while let Some(row) = event_rows.next()? { while let Some(row) = event_rows.next()? {
if first_result { if first_result {
debug!("time to first result: {:?}", start.elapsed()); debug!(
"time to first result: {:?} (cid={}, sub={:?})",
start.elapsed(),
client_id,
sub.id
);
first_result = false; first_result = false;
} }
// check if this is still active // check if this is still active
// TODO: check every N rows // TODO: check every N rows
if abandon_query_rx.try_recv().is_ok() { if abandon_query_rx.try_recv().is_ok() {
debug!("query aborted"); debug!("query aborted (sub={:?})", sub.id);
return Ok(()); return Ok(());
} }
row_count += 1; row_count += 1;
@ -598,9 +608,11 @@ pub async fn db_query(
}) })
.ok(); .ok();
debug!( debug!(
"query completed ({} rows) in {:?}", "query completed ({} rows) in {:?} (cid={}, sub={:?})",
row_count, row_count,
start.elapsed() start.elapsed(),
client_id,
sub.id
); );
} else { } else {
warn!("Could not get a database connection for querying"); warn!("Could not get a database connection for querying");

View File

@ -57,7 +57,7 @@ where
} }
/// Attempt to form a single-char tag name. /// Attempt to form a single-char tag name.
fn single_char_tagname(tagname: &str) -> Option<char> { pub fn single_char_tagname(tagname: &str) -> Option<char> {
// We return the tag character if and only if the tagname consists // We return the tag character if and only if the tagname consists
// of a single char. // of a single char.
let mut tagnamechars = tagname.chars(); let mut tagnamechars = tagname.chars();
@ -301,7 +301,7 @@ mod tests {
fn empty_event_tag_match() -> Result<()> { fn empty_event_tag_match() -> Result<()> {
let event = simple_event(); let event = simple_event();
assert!(!event assert!(!event
.generic_tag_val_intersect("e", &HashSet::from(["foo".to_owned(), "bar".to_owned()]))); .generic_tag_val_intersect('e', &HashSet::from(["foo".to_owned(), "bar".to_owned()])));
Ok(()) Ok(())
} }
@ -312,7 +312,7 @@ mod tests {
event.build_index(); event.build_index();
assert_eq!( assert_eq!(
event.generic_tag_val_intersect( event.generic_tag_val_intersect(
"e", 'e',
&HashSet::from(["foo".to_owned(), "bar".to_owned()]) &HashSet::from(["foo".to_owned(), "bar".to_owned()])
), ),
true true

View File

@ -1,4 +1,3 @@
//! Server process //! Server process
use futures::SinkExt; use futures::SinkExt;
use futures::StreamExt; use futures::StreamExt;
@ -133,19 +132,19 @@ async fn handle_web_request(
let rinfo = RelayInfo::from(config.info.clone()); let rinfo = RelayInfo::from(config.info.clone());
let b = Body::from(serde_json::to_string_pretty(&rinfo).unwrap()); let b = Body::from(serde_json::to_string_pretty(&rinfo).unwrap());
return Ok(Response::builder() return Ok(Response::builder()
.status(200) .status(200)
.header("Content-Type", "application/nostr+json") .header("Content-Type", "application/nostr+json")
.header("Access-Control-Allow-Origin", "*") .header("Access-Control-Allow-Origin", "*")
.body(b) .body(b)
.unwrap()); .unwrap());
} }
} }
} }
Ok(Response::builder() Ok(Response::builder()
.status(200) .status(200)
.header("Content-Type", "text/plain") .header("Content-Type", "text/plain")
.body(Body::from("Please use a Nostr client to connect.")).unwrap() .body(Body::from("Please use a Nostr client to connect."))
) .unwrap())
} }
(_, _) => { (_, _) => {
//handle any other url //handle any other url
@ -533,7 +532,7 @@ async fn nostr_server(
previous_query.send(()).ok(); previous_query.send(()).ok();
} }
// start a database query // start a database query
db::db_query(s, pool.clone(), query_tx.clone(), abandon_query_rx).await; db::db_query(s, cid.to_owned(), pool.clone(), query_tx.clone(), abandon_query_rx).await;
}, },
Err(e) => { Err(e) => {
info!("Subscription error: {}", e); info!("Subscription error: {}", e);

View File

@ -1,13 +1,13 @@
//! Database schema and migrations //! Database schema and migrations
use crate::db::PooledConnection; use crate::db::PooledConnection;
use crate::error::Result; use crate::error::Result;
use crate::utils::is_hex; use crate::event::{single_char_tagname, Event};
use crate::utils::is_lower_hex;
use log::*; use log::*;
use rusqlite::limits::Limit; use rusqlite::limits::Limit;
use rusqlite::params; use rusqlite::params;
use rusqlite::Connection; use rusqlite::Connection;
use std::time::Instant;
// TODO: drop the pubkey_ref and event_ref tables
/// Startup DB Pragmas /// Startup DB Pragmas
pub const STARTUP_SQL: &str = r##" pub const STARTUP_SQL: &str = r##"
@ -24,7 +24,7 @@ PRAGMA journal_mode=WAL;
PRAGMA main.synchronous=NORMAL; PRAGMA main.synchronous=NORMAL;
PRAGMA foreign_keys = ON; PRAGMA foreign_keys = ON;
PRAGMA application_id = 1654008667; PRAGMA application_id = 1654008667;
PRAGMA user_version = 5; PRAGMA user_version = 6;
-- Event Table -- Event Table
CREATE TABLE IF NOT EXISTS event ( CREATE TABLE IF NOT EXISTS event (
@ -53,7 +53,7 @@ id INTEGER PRIMARY KEY,
event_id INTEGER NOT NULL, -- an event ID that contains a tag. event_id INTEGER NOT NULL, -- an event ID that contains a tag.
name TEXT, -- the tag name ("p", "e", whatever) name TEXT, -- the tag name ("p", "e", whatever)
value TEXT, -- the tag value, if not hex. value TEXT, -- the tag value, if not hex.
value_hex BLOB, -- the tag value, if it can be interpreted as a hex string. value_hex BLOB, -- the tag value, if it can be interpreted as a lowercase hex string.
FOREIGN KEY(event_id) REFERENCES event(id) ON UPDATE CASCADE ON DELETE CASCADE FOREIGN KEY(event_id) REFERENCES event(id) ON UPDATE CASCADE ON DELETE CASCADE
); );
CREATE INDEX IF NOT EXISTS tag_val_index ON tag(value); CREATE INDEX IF NOT EXISTS tag_val_index ON tag(value);
@ -103,7 +103,7 @@ pub fn upgrade_db(conn: &mut PooledConnection) -> Result<()> {
if curr_version == 0 { if curr_version == 0 {
match conn.execute_batch(INIT_SQL) { match conn.execute_batch(INIT_SQL) {
Ok(()) => { Ok(()) => {
info!("database pragma/schema initialized to v4, and ready"); info!("database pragma/schema initialized to v6, and ready");
} }
Err(err) => { Err(err) => {
error!("update failed: {}", err); error!("update failed: {}", err);
@ -154,7 +154,6 @@ PRAGMA user_version = 3;
panic!("database could not be upgraded"); panic!("database could not be upgraded");
} }
} }
info!("Starting transaction");
// iterate over every event/pubkey tag // iterate over every event/pubkey tag
let tx = conn.transaction()?; let tx = conn.transaction()?;
{ {
@ -166,7 +165,7 @@ PRAGMA user_version = 3;
let tag_name: String = row.get(1)?; let tag_name: String = row.get(1)?;
let tag_value: String = row.get(2)?; let tag_value: String = row.get(2)?;
// this will leave behind p/e tags that were non-hex, but they are invalid anyways. // this will leave behind p/e tags that were non-hex, but they are invalid anyways.
if is_hex(&tag_value) { if is_lower_hex(&tag_value) {
tx.execute( tx.execute(
"INSERT INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3);", "INSERT INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3);",
params![event_id, tag_name, hex::decode(&tag_value).ok()], params![event_id, tag_name, hex::decode(&tag_value).ok()],
@ -225,9 +224,63 @@ PRAGMA user_version=5;
} }
} }
} else if curr_version == 5 { } else if curr_version == 5 {
debug!("Database version was already current"); info!("database schema needs update from 5->6");
} else if curr_version > 5 { // We need to rebuild the tags table. iterate through the
panic!("Database version is newer than supported by this executable"); // event table. build event from json, insert tags into a
// fresh tag table. This was needed due to a logic error in
// how hex-like tags got indexed.
let start = Instant::now();
let tx = conn.transaction()?;
{
// Clear out table
tx.execute("DELETE FROM tag;", [])?;
let mut stmt = tx.prepare("select id, content from event order by id;")?;
let mut tag_rows = stmt.query([])?;
while let Some(row) = tag_rows.next()? {
// we want to capture the event_id that had the tag, the tag name, and the tag hex value.
let event_id: u64 = row.get(0)?;
let event_json: String = row.get(1)?;
let event: Event = serde_json::from_str(&event_json)?;
// look at each event, and each tag, creating new tag entries if appropriate.
for t in event.tags.iter().filter(|x| x.len() > 1) {
let tagname = t.get(0).unwrap();
let tagnamechar_opt = single_char_tagname(tagname);
if tagnamechar_opt.is_none() {
continue;
}
// safe because len was > 1
let tagval = t.get(1).unwrap();
// insert as BLOB if we can restore it losslessly.
// this means it needs to be even length and lowercase.
if (tagval.len() % 2 == 0) && is_lower_hex(&tagval) {
tx.execute(
"INSERT INTO tag (event_id, name, value_hex) VALUES (?1, ?2, ?3);",
params![event_id, tagname, hex::decode(&tagval).ok()],
)?;
} else {
// otherwise, insert as text
tx.execute(
"INSERT INTO tag (event_id, name, value) VALUES (?1, ?2, ?3);",
params![event_id, tagname, &tagval],
)?;
}
}
}
tx.execute("PRAGMA user_version = 6;", [])?;
}
tx.commit()?;
info!("database schema upgraded v5 -> v6 in {:?}", start.elapsed());
// vacuum after large table modification
let start = Instant::now();
conn.execute("VACUUM;", [])?;
info!("vacuumed DB after tags rebuild in {:?}", start.elapsed());
} else if curr_version == 6 {
debug!("Database version was already current (v6)");
} else if curr_version > 7 {
panic!(
"Database version is newer than supported by this executable (v{})",
curr_version
);
} }
// Setup PRAGMA // Setup PRAGMA

View File

@ -1,7 +1,6 @@
//! Subscription and filter parsing //! Subscription and filter parsing
use crate::error::Result; use crate::error::Result;
use crate::event::Event; use crate::event::Event;
use log::*;
use serde::de::Unexpected; use serde::de::Unexpected;
use serde::{Deserialize, Deserializer, Serialize}; use serde::{Deserialize, Deserializer, Serialize};
use serde_json::Value; use serde_json::Value;
@ -80,9 +79,7 @@ impl<'de> Deserialize<'de> for ReqFilter {
} else if key == "authors" { } else if key == "authors" {
rf.authors = Deserialize::deserialize(val).ok(); rf.authors = Deserialize::deserialize(val).ok();
} else if key.starts_with('#') && key.len() > 1 && val.is_array() { } else if key.starts_with('#') && key.len() > 1 && val.is_array() {
info!("testing tag search char: {}", key);
if let Some(tag_search) = tag_search_char_from_filter(key) { if let Some(tag_search) = tag_search_char_from_filter(key) {
info!("found a character from the tag search: {}", tag_search);
if ts.is_none() { if ts.is_none() {
// Initialize the tag if necessary // Initialize the tag if necessary
ts = Some(HashMap::new()); ts = Some(HashMap::new());

View File

@ -13,3 +13,21 @@ pub fn unix_time() -> u64 {
pub fn is_hex(s: &str) -> bool { pub fn is_hex(s: &str) -> bool {
s.chars().all(|x| char::is_ascii_hexdigit(&x)) s.chars().all(|x| char::is_ascii_hexdigit(&x))
} }
/// Check if a string contains only lower-case hex chars.
pub fn is_lower_hex(s: &str) -> bool {
s.chars().all(|x| {
(char::is_ascii_lowercase(&x) || char::is_ascii_digit(&x)) && char::is_ascii_hexdigit(&x)
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lower_hex() {
let hexstr = "abcd0123";
assert_eq!(is_lower_hex(hexstr), true);
}
}